This R markdown file contains all of the analyses for the KOSMOS paper. For any questions about the analysis, please contact me at mmin@uw.edu.
# Get world map
world_df <- map_data("world")
# pull out only peru
peru_df <- world_df[world_df$region == "Peru",]
# Load data
# peru_spdf <- readOGR(dsn = "/Users/mmin/Documents/KOSMOS_data_local/map_files/PER_adm/PER_adm0.shp")
peru_spdf <- readOGR(dsn = here::here("data", "map_files", "PER_adm", "PER_adm0.shp"))
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/markusmin/Documents/MBARI-2167/KOSMOS_eDNA_paper/KOSMOS_eDNA_paper/data/map_files/PER_adm/PER_adm0.shp", layer: "PER_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings: ID_0 OBJECTID_1
# Convert to df(ish)
peru_spdf_fort <- tidy(peru_spdf)
## Regions defined for each Polygons
peru_map <- ggplot(data = world_df, aes(x = long, y = lat, group = group))+
geom_polygon(fill = "gray60", color = "grey40")+
geom_polygon(data = peru_df,aes(x = long, y = lat, group = group, fill = region), color = "black")+
scale_fill_manual(values = c("white"))+
coord_fixed(xlim = c(-90, -68), ylim = c(-29,1), ratio = 1.3)+
theme(legend.position = "none",
panel.background = element_rect(fill="#c6dbef"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_text(size = 13,face = "bold"),
axis.title = element_text(size = 15, face = "bold"))+
ylab("Latitude")+
xlab("Longitude")+
scale_x_continuous(breaks = c(-90,-85,-80,-75,-70), expand = c(0,0), labels=c(expression(paste(90*degree,"W")),expression(paste(85*degree,"W")),
expression(paste(80*degree,"W")),expression(paste(75*degree,"W")),
expression(paste(70*degree,"W"))))+
scale_y_continuous(breaks = seq(-25,0,5), expand = c(0,0), labels=c(expression(paste(25*degree,"S")),
expression(paste(20*degree,"S")),expression(paste(15*degree,"S")),
expression(paste(10*degree,"S")),expression(paste(5*degree,"S")),expression(paste(0*degree))))+
annotate("rect", xmin = -77.6, xmax = -76.8, ymin = -12.5, ymax = -11.9,color="firebrick3", fill = NA)+
annotate("text",label = "Peru",x = -75.5, y = -10,size = 8)+
annotate("text", label = " La Punta \n (Callao) ",x = -86, y = -12.2, size = 8)+
geom_segment(aes(x = -83, y = -12.2, xend = -78, yend = -12.2), colour='black', size=1,arrow = arrow(length = unit(0.25, "cm"),type = "closed"))+
# callout to mesocosm setup
annotate("segment",x = -77.6, y = -12.5, xend = -87, yend = -15.5, color = "firebrick3", lty = 2)+
annotate("segment",x = -76.8, y = -12.5, xend = -78, yend = -15.5, color = "firebrick3", lty = 2)+
# mesocosm arrangement - manually because I stink at for loops
# Box
annotate("rect", xmin = -87, xmax = -78, ymin = -15.5, ymax = -27.5,color="firebrick3",fill=NA, lty = 3)+ #background
# Left row
annotate("point", x = -85, y = -17, shape = 21, fill = "white", size = 12, color = "firebrick3",stroke = 2)+ # M1
annotate("text", x = -85, y = -17,label = "M1", color = "black",size = 5)+ # M1
annotate("point", x = -85, y = -20, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M3
annotate("text", x = -85, y = -20,label = "M3", color = "black", alpha = 0.5)+ # M3
annotate("point", x = -85, y = -23, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M5
annotate("text", x = -85, y = -23,label = "M5", color = "black", alpha = 0.5)+ # M5
annotate("point", x = -85, y = -26, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M7
annotate("text", x = -85, y = -26, label = "M7", color = "black", alpha = 0.5)+ # M7
# Right row
annotate("point", x = -80, y = -17, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M2
annotate("text", x = -80, y = -17,label = "M2", color = "black", alpha = 0.5)+ # M2
annotate("point", x = -80, y = -20, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M4
annotate("text", x = -80, y = -20,label = "M4", color = "black", alpha = 0.5)+ # M4
annotate("point", x = -80, y = -23, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M6
annotate("text", x = -80, y = -23,label = "M6", color = "black", alpha = 0.5)+ # M6
annotate("point", x = -80, y = -26, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M8
annotate("text", x = -80, y = -26, label = "M8", color = "black", alpha = 0.5)+ # M8
# Pacific
# annotate("point", x = -77.3175, y = -12.0875, shape = 21, fill = "white",color = "white", size = 15, stroke = 1)+ # Pacific
annotate("text", x = -82.5, y = -21.5,label = "bold(Pacific)", color = "darkblue",size = 6, parse = TRUE) # Pacific
peru_map
ggsave("kosmos_map.pdf", width = 5, height = 8,path = here::here("figures"))
M_nutrients <- read.csv(file = here::here("data", "kosmos_physical_data", "Figure_4_Francisco.csv"))
M_chla_long <- read.csv(file = here::here("data", "kosmos_physical_data", "chla_Francisco.csv"))
kosmos_T <- read.csv(file = here::here("data", "kosmos_physical_data", "kosmos_temperature_all.csv"))
kosmos_S <- read.csv(file = here::here("data", "kosmos_physical_data", "kosmos_salinity_all.csv"))
# Convert all five datasets into long form for plots
# Separate out long form data into each nutrient separately
NOx_long <- subset(M_nutrients, parameter %in% c("NOx"))
NOx_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> NOx_long
SiO4_long <-subset(M_nutrients, parameter %in% c("SiO4"))
SiO4_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> SiO4_long
rownames(M_chla_long) = NULL
chla_long <- M_chla_long
chla_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> chla_long
T_long <- kosmos_T %>% gather(.,mesocosm, value, M1:Pacific)
parameter = rep("temp",nrow(T_long))
T_long %>% dplyr::rename(.,sampling.day = day) %>% data.frame(.,parameter) -> T_long
S_long <- kosmos_S %>% gather(.,mesocosm, value, M1:Pacific)
parameter = rep("sal",nrow(S_long))
S_long %>% dplyr::rename(.,sampling.day = day) %>% data.frame(.,parameter) -> S_long
# Concat the different variables, export as one dataframe for inclusion with publication
combined_physicochemical_data <- bind_rows(bind_rows(bind_rows(bind_rows(T_long,S_long),NOx_long),SiO4_long),chla_long)
write.csv(combined_physicochemical_data, here::here("data", "kosmos_physical_data", "Figure_2_data.csv"))
combined_physicochemical_data <- read.csv(here::here("data", "kosmos_physical_data", "Figure_2_data.csv"),row.names = 1)
combined_physicochemical_data$mesocosm <- as.character(combined_physicochemical_data$mesocosm)
combined_physicochemical_data$parameter <- as.character(combined_physicochemical_data$parameter)
T_long <- subset(combined_physicochemical_data,parameter == "temp")
S_long <- subset(combined_physicochemical_data,parameter == "sal")
NOx_long <- subset(combined_physicochemical_data,parameter == "NOx")
SiO4_long <- subset(combined_physicochemical_data,parameter == "SiO4")
chla_long <- subset(combined_physicochemical_data,parameter == "chla")
variable_names <- c("NOx","SiO4","chla","T","S")
for (i in seq(1:length(variable_names))){
df <- pivot_wider(eval(parse(text = paste0(variable_names[i],"_long"))),names_from = mesocosm, values_from = value)
colnames(df) <- c("sampling.day","parameter","M1","M2","M3","M4","M5","M6","M7","M8","Pacific")
# convert to long form
df_long <- df %>% gather(.,station, value, M1:M8)
# drop Pacific values
df_long <- df_long[,!(names(df_long) %in% c("Pacific"))]
# Sumamrize data to calculate the mean + SE
df_meanSE_data <- summarySE(df_long, measurevar="value", groupvars=c("sampling.day"),na.rm = TRUE)
# Add in M1 and Pacific to this data
complete_df <- left_join(df[,c("sampling.day", "M1","Pacific")],df_meanSE_data)
# Rename some columns
complete_df %>% dplyr::rename(., M1_M8_mean = value, M1_M8_sd = sd,M1_M8_se = se,M1_M8_ci = ci) -> complete_df
# Save as a dataframe
assign(paste0(variable_names[i],"_df"), complete_df)
}
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
There are a number of “ifelse” statements in this chunk of code which are used to adjust the plot margins in order to align the plots, as well as make the top and bottom (a and e) figures have x-axis labels.
variable_names <- c("T","S","NOx","SiO4","chla")
panel = c("(a)","(b)","(c)","(d)","(e)")
for (i in seq(1:length(variable_names))){
plot <- ggplot(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1))+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "firebrick3",size = 1.25)+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1_M8_mean),color = "grey50",size = 1)+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "darkblue",size = 1.25)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1)+
geom_ribbon(data = eval(parse(text = paste0(variable_names[i],"_df"))),aes(x = sampling.day,ymax = M1_M8_mean+M1_M8_se,ymin = M1_M8_mean-M1_M8_se),fill = "grey50",alpha = 0.5)+
geom_point(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))),sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = Pacific), color = "darkblue", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
geom_point(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))),sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = M1), color = "firebrick3", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
# Add points for sampling points
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "black",size = 1.5)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1.5)+
xlab("Day of Experiment")+
# Three different themes: for top panel, middle three panels, and bottom panel
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text.y = element_text(size = 15,face = "bold"),
axis.text.x.bottom = element_text(size = ifelse(panel[i] %in% c("(e)"),20,0),face = "bold"),
axis.text.x.top = element_text(size = ifelse(panel[i] %in% c("(a)"),20,0),face = "bold"),
axis.title.x.bottom = element_text(size = ifelse(panel[i] %in% c("(e)"),25,0), face = "bold"),
axis.title.x.top = element_text(size = ifelse(panel[i] %in% c("(a)"),25,0), face = "bold"),
axis.title.y = element_text(size = 18, face = "bold",margin = margin(0, ifelse(panel[i] %in% c("(b)"), 1.7,
ifelse(panel[i] %in% c("(a)"), 11.75, 11.5)), 0,0)),
axis.ticks.length.x.bottom = unit(0.25,"cm"),
axis.ticks.length.x.top = unit(ifelse(panel[i] %in% c("(a)"),0.25,0),"cm"),
plot.margin = unit(c(ifelse(panel[i] == "(a)",0,5),0,0,3), "pt"),
legend.position = "none")+
ylim(ifelse(panel[i] %in% c("(c)","(d)","(e)"),0,min(eval(parse(text = paste0(variable_names[i],"_df")))$Pacific)),NA)+
# Add in secondary x axis and labels for temperature plot
scale_x_continuous(sec.axis = sec_axis(~ .,name = "Sample Number", breaks = c(1,8,15,24,32,36,42,48),labels = c(1,2,3,4,5,6,7,8)),limits = c(1,50))+
# geom_text(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))), sampling.day %in% c(1,8,15,24,32,36,42,48)), aes(x = sampling.day, y = 21.28, label = sample_number), size = ifelse(panel[i] %in% c("(a)"),7,0), color = "black")+
annotate("text", x=-Inf, y = Inf, label = paste0(panel[i]), vjust=1.5, hjust=-0.3,size = 12)+
# add vertical lines for omz water addition
geom_vline(xintercept = 11, lty = 2, color = "seagreen", size = 1)+
geom_vline(xintercept = 12, lty = 2, color = "seagreen", size = 1)+
# add vertical lines for NaCl brine addition
geom_vline(xintercept = 13, lty = 2, color = "#ff7f00", size = 1)+
geom_vline(xintercept = 33, lty = 2, color = "#ff7f00", size = 1)+
# Use an ifelse statement to change the y labels
ylab(ifelse(panel[i] == "(a)",expression(bold(~"Temperature" ~ ("°C"))),
ifelse(panel[i] == "(b)",expression(bold(~"Salinity" ~ ("psu"))),
ifelse(panel[i] == "(c)",expression(bold(~"NO"[2] ~ + ~"NO"[3] ~ (mu*"mol L" ^-1)~"\n")),
ifelse(panel[i] == "(d)",expression(bold(~"Si(OH)"[4] ~ (mu*"mol L" ^-1)~"\n")),
ifelse(panel[i] == "(e)",expression(bold(~"chl-a" ~ (mu*"g L" ^-1) ~ "\n")),
"oops you messed up"))))))
ggsave(here::here("figures", "biogeochemical_figure", paste0(variable_names[i],"_gg.png")) ,plot,width = 7, height = ifelse(panel[i] %in% c("(a)","(e)"),3.5,3))
# show plot
plot
}
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
legend_gg <- ggplot(data = T_long, aes(x = Day, y = value, color = variable))+
xlim(30,38)+
ylim(20.23,20.67)+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = unit(c(0,0,0,58),"pt"))+
annotate("segment", x = 30, xend = 31, y = 20.55, yend = 20.55, color = "firebrick3",size = 1.5)+ #mesocosm
annotate("segment", x = 30, xend = 31, y = 20.35, yend = 20.35, color = "darkblue", size = 1.5)+ #pacific
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 1.5)+ #M1-M8 average
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 5,alpha = 0.5)+ #M1-M8 average SE
annotate("point", x = 35, y = 20.35, shape = 21, colour = "black", fill = "white", size = 7, stroke = 3)+ # eDNA sample
annotate("point", x = 35, y = 20.35, colour = "black", size = 1.5)+ # eDNA sample
annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
annotate("text",label = "Mean M1-M8", x = 36, y = 20.55, size = 6, adj = 0)+ #M1-M8 average
annotate("text",label = "eDNA Sample", x = 36, y = 20.35, size = 6, adj = 0) #eDNA sample
legend_gg
ggsave("legend_gg.png", width = 7, height = 1,path = here::here("figures"))
# Re-make, but with OMZ addition and NaCl brine additions in legend
legend_gg <- ggplot(data = T_long, aes(x = Day, y = value, color = variable))+
xlim(30,42.5)+
ylim(20.23,20.67)+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = unit(c(0,0,0,58),"pt"))+
annotate("segment", x = 30, xend = 31, y = 20.55, yend = 20.55, color = "firebrick3",size = 1.5)+ #mesocosm
annotate("segment", x = 30, xend = 31, y = 20.35, yend = 20.35, color = "darkblue", size = 1.5)+ #pacific
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 1.5)+ #M1-M8 average
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 5,alpha = 0.5)+ #M1-M8 average SE
annotate("point", x = 35, y = 20.35, shape = 21, colour = "black", fill = "white", size = 7, stroke = 3)+ # eDNA sample
annotate("point", x = 35, y = 20.35, colour = "black", size = 1.5)+ # eDNA sample
annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
annotate("text",label = "Mean M1-M8", x = 36, y = 20.55, size = 6, adj = 0)+ #M1-M8 average
annotate("text",label = "eDNA Sample", x = 36, y = 20.35, size = 6, adj = 0)+ #eDNA sample
# Add OMZ and NaCl brine additions to legend
annotate("segment", x = 38.5, xend = 39.8, y = 20.55, yend = 20.55, color = "seagreen", size = 1.5,lty = 2)+ #OMZ water addition
annotate("text", x = 40, y = 20.55, label = "OMZ water addition", size = 6, adj = 0)+ #OMZ water addition
annotate("segment", x = 38.5, xend = 39.8, y = 20.35, yend = 20.35, color = "#ff7f00", size = 1.5, lty = 2)+ #NaCl brine addition
annotate("text", x = 40, y = 20.35, label = "NaCl brine addition", size = 6, adj = 0) #NaCl brine addition
legend_gg
ggsave("legend_final.png", width = 11, height = 1,path = here::here("figures", "biogeochemical_figure"))
# FCM_df <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/FCM/KOSMOS2017_FCM_data_final_version_MM_edits.csv")
FCM_df <- read.csv(here::here("data", "Worden_lab_data", "FCM", "KOSMOS2017_FCM_data_final_version_MM_edits.csv"))
# Convert to long format (to achieve consistency with biogeochemical data format)
# Gather the five columns that we will plot
gathercols <- c("Synechococcus", "total_Eukaryotes", "cryptophytes", "heterotrophic.bacteria", "small_bacterial_population")
FCM_df_gather <- gather(dplyr::select(FCM_df,-c(Sample,Prochlorococcus,notes)), key = "parameter", value = "value", gathercols, factor_key=TRUE)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(gathercols)` instead of `gathercols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
colnames(FCM_df_gather) <- c("mesocosm","sampling.day","parameter","value")
# Divide by 10000, so all values are in cells/ml x 10^4 (see Worden et al. 2004 for precedent)
FCM_df_gather$value <- FCM_df_gather$value/10000
# Take out only M1 and Pacific, drop DW
FCM_df_gather <- subset(FCM_df_gather, mesocosm %in% c("Pacific","Mesocosm 1"))
FCM_df_gather$mesocosm <- as.character(FCM_df_gather$mesocosm)
FCM_df_gather$parameter <- as.character(FCM_df_gather$parameter)
syn_long <- subset(FCM_df_gather,parameter == "Synechococcus")
euks_long <- subset(FCM_df_gather,parameter == "total_Eukaryotes")
cryp_long <- subset(FCM_df_gather,parameter == "cryptophytes")
het_bact_long <- subset(FCM_df_gather,parameter == "heterotrophic.bacteria")
small_bact_long <- subset(FCM_df_gather,parameter == "small_bacterial_population")
variable_names <- c("syn","euks","cryp","het_bact","small_bact")
for (i in seq(1:length(variable_names))){
df <- pivot_wider(eval(parse(text = paste0(variable_names[i],"_long"))),names_from = mesocosm, values_from = value)
# Remove first sampling point, because Alex and Bente don't trust it (issue with water handling or sampling)
df <- subset(df, sampling.day != 2)
colnames(df) <- c("sampling.day","parameter","Pacific","M1")
df$Pacific <- as.numeric(df$Pacific)
df$M1 <- as.numeric(df$M1)
assign(paste0(variable_names[i],"_df"), df)
}
euks_df
## # A tibble: 22 × 4
## sampling.day parameter Pacific M1
## <int> <chr> <dbl> <dbl>
## 1 4 total_Eukaryotes 0.486 0.586
## 2 6 total_Eukaryotes 0.434 0.707
## 3 8 total_Eukaryotes 0.520 0.442
## 4 10 total_Eukaryotes 0.491 0.170
## 5 12 total_Eukaryotes 0.500 0.265
## 6 15 total_Eukaryotes 1.25 0.514
## 7 16 total_Eukaryotes 0.903 0.688
## 8 17 total_Eukaryotes 0.763 0.613
## 9 22 total_Eukaryotes 0.296 0.529
## 10 26 total_Eukaryotes 0.727 0.91
## # … with 12 more rows
variable_names <- c("syn","euks","cryp","het_bact","small_bact")
panel = c("(f)","(g)","(h)","(i)","(j)")
for (i in seq(1:length(variable_names))){
df = eval(parse(text = paste0(variable_names[i],"_df")))
plot <- ggplot(data = df[!is.na(df$M1),], aes(x = sampling.day, y = M1))+
geom_line(data = df[!is.na(df$M1),], aes(x = sampling.day, y = M1),color = "firebrick3",size = 1.25)+
geom_line(data = df[!is.na(df$Pacific),], aes(x = sampling.day, y = Pacific),color = "darkblue",size = 1.25)+
geom_point(data = subset(df,sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = Pacific), color = "darkblue", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
geom_point(data = subset(df,sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = M1), color = "firebrick3", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
# Add points for sampling points
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "black",size = 1.5)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1.5)+
xlab("Day of Experiment")+
# Three different themes: for top panel, middle three panels, and bottom panel
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text.y = element_text(size = 15,face = "bold"),
axis.text.x.bottom = element_text(size = ifelse(panel[i] %in% c("(j)"),20,0),face = "bold"),
axis.text.x.top = element_text(size = ifelse(panel[i] %in% c("(f)"),20,0),face = "bold"),
axis.title.x.bottom = element_text(size = ifelse(panel[i] %in% c("(j)"),25,0), face = "bold"),
axis.title.x.top = element_text(size = ifelse(panel[i] %in% c("(f)"),25,0), face = "bold"),
# axis.title.y = element_text(size = 18, face = "bold"),
axis.ticks.length.x.bottom = unit(0.25,"cm"),
axis.ticks.length.x.top = unit(ifelse(panel[i] %in% c("(f)"),0.25,0),"cm"),
plot.margin = unit(c(ifelse(panel[i] == "(f)",0,5),0,0,3), "pt"),
# plot.margin = unit(c(ifelse(panel[i] == "(f)",0,5),0,0,ifelse(panel[i] %in% c("(f)"),12.5,
# ifelse(panel[i] %in% c("(g)"),21,
# ifelse(panel[i] %in% c("(h)"),0,
# ifelse(panel[i] %in% c("(i)"),5,
# ifelse(panel[i] %in% c("(j)"),7.5,999999)))))), "pt"),
axis.title.y = element_text(size = 18, face = "bold",margin = margin(t = ifelse(panel[i] == "(f)",0,5),r = ifelse(panel[i] %in% c("(f)"),12.5,
ifelse(panel[i] %in% c("(g)"),21,
ifelse(panel[i] %in% c("(h)"),0,
ifelse(panel[i] %in% c("(i)"),5,
ifelse(panel[i] %in% c("(j)"),7.5,999999))))),b =0,l =0, "pt")),
legend.position = "none")+
xlim(1,50)+
# ylim(ifelse(panel[i] %in% c("(h)","(i)","(j)"),0,min(df$Pacific)),NA)+
ylim(0,ifelse(panel[i] %in% c("(f)"),45,
ifelse(panel[i] %in% c("(i)"),750, NA)))+
# Add in secondary x axis and labels for temperature plot
scale_x_continuous(sec.axis = sec_axis(~ .,name = "Sample Number", breaks = c(1,8,15,24,32,36,42,48),labels = c(1,2,3,4,5,6,7,8)),limits = c(1,50))+
# geom_text(data = subset(df, sampling.day %in% c(1,8,15,24,32,36,42,48)), aes(x = sampling.day, y = 21.28, label = sample_number), size = ifelse(panel[i] %in% c("(f)"),7,0), color = "black")+
annotate("text", x=-Inf, y = Inf, label = paste0(panel[i]), vjust=1.5, hjust=-0.3,size = 12)+
# add vertical lines for omz water addition
geom_vline(xintercept = 11, lty = 2, color = "seagreen", size = 1)+
geom_vline(xintercept = 12, lty = 2, color = "seagreen", size = 1)+
# add vertical lines for NaCl brine addition
geom_vline(xintercept = 13, lty = 2, color = "#ff7f00", size = 1)+
geom_vline(xintercept = 33, lty = 2, color = "#ff7f00", size = 1)+
# Use an ifelse statement to change the y labels
ylab(ifelse(panel[i] == "(f)",expression(bolditalic(~"Synechococcus")),
ifelse(panel[i] == "(g)",expression(bold(~"Total eukaryotes")),
ifelse(panel[i] == "(h)",expression(bold(~"Cryptophytes")),
ifelse(panel[i] == "(i)",expression(bold(~"Heterotrophic bacteria")),
ifelse(panel[i] == "(j)",expression(bold(~"Small bacteria")),
"oops you messed up"))))))
ggsave(here::here("figures", "biogeochemical_figure", paste0(variable_names[i],"_gg.png")),plot,width = 7, height = ifelse(panel[i] %in% c("(f)","(j)"),3.5,3))
}
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
# Make a "cells/ml" label
cells_ml_legend_gg <- ggplot(data = syn_df, aes(x = sampling.day, y = M1))+
geom_point()+
ylab(expression(bold(~"Cells ml" ^-1 ~ ("x 10"^4) ~ "\n")))+
theme(axis.title.y = element_text(size = 30))
ggsave(here::here("figures", "biogeochemical_figure", "cells_ml_legend.png"), cells_ml_legend_gg,width = 7,height = 7)
legend_gg <- ggplot(data = T_long, aes(x = Day, y = value, color = variable))+
xlim(30,38)+
ylim(20.23,20.67)+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = unit(c(0,0,0,58),"pt"))+
annotate("segment", x = 30, xend = 31, y = 20.55, yend = 20.55, color = "firebrick3",size = 1.5)+ #mesocosm
annotate("segment", x = 30, xend = 31, y = 20.35, yend = 20.35, color = "darkblue", size = 1.5)+ #pacific
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 1.5)+ #M1-M8 average
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 5,alpha = 0.5)+ #M1-M8 average SE
annotate("point", x = 35, y = 20.35, shape = 21, colour = "black", fill = "white", size = 7, stroke = 3)+ # eDNA sample
annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
annotate("text",label = "Mean M1-M8", x = 36, y = 20.55, size = 6, adj = 0)+ #M1-M8 average
annotate("text",label = "eDNA Sample", x = 36, y = 20.35, size = 6, adj = 0) #eDNA sample
legend_gg
ggsave("legend_gg.png", width = 7, height = 1,path = here::here("figures", "biogeochemical_figure"))
We will be reading in the master datasets from MBON and subsetting out only the KOSMOS project.
Please note: there are some code chunks that have “eval = FALSE” because they are used to subset the master datasets, which are too large to upload to GitHub.
metadata <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
## Depth
# Change depth to character so that the mutate line will work
metadata$depth <- as.character(metadata$depth)
# Change surface to 1 m, bottom to 20 m for Florida samples
metadata %>% mutate(.,depth = ifelse(depth %in% c("Surface","SUR"),"1",
ifelse(depth %in% c("Bottom","BOT"),"20",depth))) -> metadata
metadata$depth <- as.numeric(as.character(metadata$depth))
# Flip the depths for the 67-60 samples in the C3PO19 cruise
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_GG"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_GG"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_GG"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_GG"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_GG"),0,depth)))))) -> metadata
# Change depth from factor to numeric
metadata$depth <- as.numeric(as.character(metadata$depth))
## Add in the depth metadata for the Hawaii + Aku samples - in email from Kris Walz on 4/24/20
metadata %>% mutate(depth = ifelse(sample_name == "HI300_J", 300,
ifelse(sample_name == "HI650_J", 650,
ifelse(sample_name == "HI900_J", 900, depth)))) -> metadata
# AKU samples
## Sampling device
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- subset(metadata, metadata$sample_name %in% metadata$sample_name[netfilts])
netfilt_sample_names <- netfilt_samples$sample_name
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_sample_names,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos",
"Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU","Dorado389"),"3G ESP",
ifelse(samp_collection_device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(SAMPLE_Collection_Device %in% c("CTD/SBE911 Rosette with Niskin bottle","CTD/SBE911_Rosette_with_Niskin_bottle"),"Shipboard Niskin",
ifelse(samp_collection_device %in% c("SCUBA diver/bottle"),"SCUBA diver",
ifelse(samp_collection_device %in% c("Methot_net"),"Net filtrate",
ifelse(sample_name %in% c("CN18Fc27_bongo__GG","CN18Fc38_bongo__GG","CN18Fc43_bongo_GG"),"Net filtrate",
metadata$samp_collection_device)))))))))))))
#metadata_updated$sampling_device
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$description <- as.character(metadata_updated$description)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN18S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(description %in% c("")),description,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name)))) -> metadata_updated
## Location
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
## Depth as categorical
#sort(metadata_updated$depth)
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5,"0-5",
ifelse(metadata_updated$depth > 5 & metadata_updated$depth < 20,"6-19",
ifelse(metadata_updated$depth >= 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 500,"280-500",
ifelse(metadata_updated$SAMPLING_platform %in% c("Mesocosm"),"0-5",
ifelse(is.na(metadata_updated$depth),"unknown",
"600-1565")))))))))))) -> metadata_updated
metadata_updated$depth_cat <- factor(metadata_updated$depth_cat,levels = c("0-5","6-19","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-1565"))
metadata_updated <- metadata_updated
# Use lubridate to convert date/time to posixct
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
class(metadata_updated$date_time_updated)
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
metadata_updated %>% mutate(.,month_updated = ifelse(!(is.na(month_updated)),month_updated,
ifelse(month %in% c("Jan"),"January",
ifelse(month %in% c("Feb"),"February",
ifelse(Month == "March" | month %in% c("Mar"),"March",
ifelse(month %in% c("Apr"),"April",
ifelse(Month == "May" | month %in% c("May"),"May",
ifelse(month %in% c("Jun"),"June",
ifelse(month %in% c("Jul"),"July",
ifelse(month %in% c("Aug"),"August",
ifelse(month %in% c("Sep"),"September",
ifelse(month %in% c("Oct"),"October",
ifelse(month %in% c("Nov"),"November",
ifelse(month %in% c("Dec"),"December",month_updated))))))))))))))-> metadata_updated
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
# Let's add a different season field - using the "conventional" seasons
metadata_updated %>% mutate(.,season_conv = ifelse(month_updated %in% c("January", "February", "March"),"Winter",
ifelse(month_updated %in% c("April", "May", "June"),"Spring",
ifelse(month_updated %in% c("July", "August", "September"),"Summer",
ifelse(month_updated %in% c("October", "November", "December"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season_conv <- factor(metadata_updated$season_conv, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
# Drop 67-70 for the C3PO19 cruise - this is the cast where we the filters got mixed up
metadata_updated <- subset(metadata_updated, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Take out all non-environmental samples
metadata_updated <- subset(metadata_updated, sample_type %in% c("environmental"))
# Take out the non-eDNA samples from the Florida zooplankton study
metadata_updated <- subset(metadata_updated, !(type_sample %in% c("pre_filt","tissue","extr_blank","filt_blank")))
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
# Move sample names to rownames for compatibility with DEICODE
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_tax_table.csv",row.names = 1)))
master_phyloseq_COI <- merge_phyloseq(asv_table,sample_data,tax_table)
## Decontaminate
# no humans
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Felidae"
)
# no rats!! and other rodents
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Order != "Rodentia"
)
# no rabbits
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Order != "Lagomorpha"
)
# no insects
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Class != "Insecta"
)
# no arachnids
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Class != "Arachnida"
)
master_phyloseq_COI <- prune_taxa(taxa_sums(master_phyloseq_COI) > 0,master_phyloseq_COI)
master_phyloseq_COI
# Subset out KOSMOS samples
kosmos_COI_phyloseq <- prune_samples(sample_data(master_phyloseq_COI)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_COI)
kosmos_COI_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_phyloseq)>0,kosmos_COI_phyloseq)
# kosmos_COI <- left_join(kosmos_COI_asv_table, kosmos_COI_tax_table, by = "ASV")
# write.csv(kosmos_COI, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_COI_061620.csv")
# EXPORT FILES FOR UPLOAD TO GITHUB
# ASV table
kosmos_COI_asv_table <- as.data.frame(as(otu_table(kosmos_COI_phyloseq),"matrix"))
rownames_to_column(kosmos_COI_asv_table,"ASV") -> kosmos_COI_asv_table
# tax table
kosmos_COI_tax_table <- as.data.frame(as(tax_table(kosmos_COI_phyloseq),"matrix"))
rownames_to_column(kosmos_COI_tax_table,"ASV") -> kosmos_COI_tax_table
# metadata
kosmos_COI_metadata <- as.data.frame(as.matrix(sample_data(kosmos_COI_phyloseq)))
rownames_to_column(kosmos_COI_metadata,"SampleID") -> kosmos_COI_metadata
# export three files
write.csv(kosmos_COI_asv_table, here::here("data", "kosmos_eDNA_data", "kosmos_COI_asv_table.csv"), row.names = FALSE)
write.csv(kosmos_COI_tax_table, here::here("data", "kosmos_eDNA_data", "kosmos_COI_tax_table.csv"), row.names = FALSE)
write.csv(kosmos_COI_metadata, here::here("data", "kosmos_eDNA_data", "kosmos_COI_metadata.csv"), row.names = FALSE)
# read data
kosmos_COI_asv_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_COI_asv_table.csv"), row.names = 1)
kosmos_COI_tax_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_COI_tax_table.csv"), row.names = 1)
kosmos_COI_metadata <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_COI_metadata.csv"), row.names = 1)
# reformat and join as phyloseq object
asv_table <- otu_table(kosmos_COI_asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(kosmos_COI_metadata)
tax_table <- tax_table(as.matrix(kosmos_COI_tax_table))
kosmos_COI_phyloseq <- merge_phyloseq(asv_table,sample_data,tax_table)
# Subset only environmental samples (non-controls)
kosmos_COI_16_phyloseq <- prune_samples(sample_data(kosmos_COI_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_COI_phyloseq)
kosmos_COI_16_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_16_phyloseq)>0,kosmos_COI_16_phyloseq)
metadata <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
## Depth
# Change depth to character so that the mutate line will work
metadata$depth <- as.character(metadata$depth)
# Change surface to 1 m, bottom to 20 m for Florida samples
metadata %>% mutate(.,depth = ifelse(depth %in% c("Surface","SUR"),"1",
ifelse(depth %in% c("Bottom","BOT"),"20",depth))) -> metadata
metadata$depth <- as.numeric(as.character(metadata$depth))
# Flip the depths for the 67-60 samples in the C3PO19 cruise
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_HH"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_HH"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_HH"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_HH"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_HH"),0,depth)))))) -> metadata
## Sampling device
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- metadata$sample_name[netfilts]
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_samples,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos","Shimada",
"Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(depth %in% c("SUR","BOT"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU","Dorado"),"3G ESP",
ifelse(samp_collection_device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(SAMPLE_Collection_Device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(samp_collection_device %in% c("SCUBA diver/bottle"),"SCUBA diver",
ifelse(samp_collection_device %in% c("Methot_net"),"Net filtrate",
ifelse(sample_name %in% c("CN18Fc27_bongo_","CN18Fc38_bongo_","CN18Fc43_bongo"),"Net filtrate",
metadata$samp_collection_device))))))))))))))
# Change depth to numeric
metadata_updated$depth <- as.numeric(as.character(metadata_updated$depth))
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$description <- as.character(metadata_updated$description)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated$PROJECT <- as.character(metadata_updated$PROJECT)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN18S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(description %in% c("")),description,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
ifelse(!(PROJECT %in% c("")), PROJECT,
project_name))))) -> metadata_updated
## Location
metadata_updated$geo_loc_NAme <- as.character(metadata_updated$geo_loc_NAme)
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
metadata_updated %>% mutate(.,geo_loc_name = ifelse(geo_loc_name %in% c(""),geo_loc_NAme,geo_loc_name)) -> metadata_updated
# Fix the FKNMS geo_loc_name
metadata_updated %>% mutate(.,geo_loc_name = ifelse(depth %in% c("SUR","BOT"),"USA:Florida:Keys",geo_loc_name)) -> metadata_updated
metadata_updated %>% mutate(.,geo_loc_name = ifelse(libraryID %in% c("M"),"USA:Florida:Keys",geo_loc_name)) -> metadata_updated
# Add in depth for DW addition sample from KOSMOS
metadata_updated["KOSMOSD15_DW2_UU",]$depth <- 30
# Create categorical depth field
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5.5,"0-5",
ifelse(metadata_updated$depth > 5.5 & metadata_updated$depth <= 20,"6-20",
ifelse(metadata_updated$depth > 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 700,"280-700",
ifelse(metadata_updated$SAMPLING_platform %in% c("Mesocosm"),"0-5",
"600-700"))))))))))) -> metadata_updated
# Use lubridate to convert date/time to posixct
# First consolidate all date/time information into one column
metadata_updated$collection_date <- as.character(metadata_updated$collection_date)
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(., SAMPLING_date_time = ifelse(is.na(SAMPLING_date_time) | SAMPLING_date_time == "", collection_date, SAMPLING_date_time)) -> metadata_updated
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
# consolidate two month columns
metadata_updated$month <- as.character(metadata_updated$month)
metadata_updated$Month <- as.character(metadata_updated$Month)
metadata_updated %>% mutate(.,month = ifelse(is.na(month) | month == "",Month,month)) -> metadata_updated
metadata_updated %>% mutate(.,month_updated = ifelse(!(is.na(month_updated)),month_updated,
ifelse(month %in% c("Jan"),"January",
ifelse(month %in% c("Feb"),"February",
ifelse(month %in% c("Mar", "March"),"March",
ifelse(month %in% c("Apr"),"April",
ifelse(month %in% c("May"),"May",
ifelse(month %in% c("Jun"),"June",
ifelse(month %in% c("Jul"),"July",
ifelse(month %in% c("Aug"),"August",
ifelse(month %in% c("Sep"),"September",
ifelse(month %in% c("Oct"),"October",
ifelse(month %in% c("Nov"),"November",
ifelse(month %in% c("Dec"),"December",month_updated))))))))))))))-> metadata_updated
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
# Let's add a different season field - using the "conventional" seasons
metadata_updated %>% mutate(.,season_conv = ifelse(month_updated %in% c("January", "February", "March"),"Winter",
ifelse(month_updated %in% c("April", "May", "June"),"Spring",
ifelse(month_updated %in% c("July", "August", "September"),"Summer",
ifelse(month_updated %in% c("October", "November", "December"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season_conv <- factor(metadata_updated$season_conv, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Fix the location for the FGNMS samples
metadata_updated %>% mutate(.,geo_loc_name = ifelse(metadata_updated$sample_name %in% c("MBON301_M","MBON302_M","MBON303_M","MBON304_M","MBON305_M","MBON306_M"),"USA:Texas:Flower Garden Banks",geo_loc_name)) -> metadata_updated
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
# Drop 67-70 for the C3PO19 cruise - this is the cast where the samples got mixed up
metadata_updated <- subset(metadata_updated, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Take out the non-eDNA samples from FK zooplankton study
metadata_updated <- subset(metadata_updated, !(type_sample %in% c("pre_filt","tissue","extr_blank","filt_blank")))
metadata_updated <- subset(metadata_updated, sample_type %in% c("environment","environmental"))
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
metadata_updated <- subset(metadata_updated, !(sampling_device %in% c("Net filtrate")))
### Move sample names to rownames for export
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
tax_table_df <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_tax_table.csv")
tax_table_df$Kingdom <- as.character(tax_table_df$Kingdom)
tax_table_df$Phylum <- as.character(tax_table_df$Phylum)
tax_table_df$Class <- as.character(tax_table_df$Class)
tax_table_df$Order <- as.character(tax_table_df$Order)
tax_table_df$Family <- as.character(tax_table_df$Family)
tax_table_df$Genus <- as.character(tax_table_df$Genus)
tax_table_df$Species <- as.character(tax_table_df$Species)
# Mutate Class
tax_table_df %>% mutate(.,Class = ifelse(tax_table_df$Species %in% c("Acantharian sp. 6201"),"Acantharea",
ifelse(tax_table_df$Order %in% c("Diplonemea"), "Diplonemea",
ifelse(tax_table_df$Order %in% c("Choanoflagellata"),"Choanoflagellatea",
tax_table_df$Class)))) -> tax_table_df
tax_table_df <- column_to_rownames(tax_table_df,"ASV")
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(tax_table_df))
master_phyloseq_18S <- merge_phyloseq(asv_table,sample_data,tax_table)
## Decontaminate
# no humans
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Felidae"
)
# no rats!! and other rodents
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Order != "Rodentia"
)
# no rabbits
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Order != "Lagomorpha"
)
master_phyloseq_18S <- prune_taxa(taxa_sums(master_phyloseq_18S) > 0,master_phyloseq_18S)
master_phyloseq_18S
# Subset out KOSMOS samples
kosmos_18S_phyloseq <- prune_samples(sample_data(master_phyloseq_18S)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_18S)
kosmos_18S_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_phyloseq)>0,kosmos_18S_phyloseq)
# kosmos_18S <- left_join(kosmos_18S_asv_table, kosmos_18S_tax_table, by = "ASV")
# write.csv(kosmos_18S, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_18S_061620.csv")
# EXPORT FILES FOR UPLOAD TO GITHUB
# ASV table
kosmos_18S_asv_table <- as.data.frame(as(otu_table(kosmos_18S_phyloseq),"matrix"))
rownames_to_column(kosmos_18S_asv_table,"ASV") -> kosmos_18S_asv_table
# tax table
kosmos_18S_tax_table <- as.data.frame(as(tax_table(kosmos_18S_phyloseq),"matrix"))
rownames_to_column(kosmos_18S_tax_table,"ASV") -> kosmos_18S_tax_table
# metadata
kosmos_18S_metadata <- as.data.frame(as.matrix(sample_data(kosmos_18S_phyloseq)))
rownames_to_column(kosmos_18S_metadata,"SampleID") -> kosmos_18S_metadata
# export three files
write.csv(kosmos_18S_asv_table, here::here("data", "kosmos_eDNA_data", "kosmos_18S_asv_table.csv"), row.names = FALSE)
write.csv(kosmos_18S_tax_table, here::here("data", "kosmos_eDNA_data", "kosmos_18S_tax_table.csv"), row.names = FALSE)
write.csv(kosmos_18S_metadata, here::here("data", "kosmos_eDNA_data", "kosmos_18S_metadata.csv"), row.names = FALSE)
# read data
kosmos_18S_asv_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_18S_asv_table.csv"), row.names = 1)
kosmos_18S_tax_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_18S_tax_table.csv"), row.names = 1)
kosmos_18S_metadata <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_18S_metadata.csv"), row.names = 1)
# reformat and join as phyloseq object
asv_table <- otu_table(kosmos_18S_asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(kosmos_18S_metadata)
tax_table <- tax_table(as.matrix(kosmos_18S_tax_table))
kosmos_18S_phyloseq <- merge_phyloseq(asv_table,sample_data,tax_table)
# Subset only environmental samples (non-controls)
kosmos_18S_16_phyloseq <- prune_samples(sample_data(kosmos_18S_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_18S_phyloseq)
kosmos_18S_16_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_16_phyloseq)>0,kosmos_18S_16_phyloseq)
metadata <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
metadata <- subset(metadata, sample_type %in% c("environmental"))
# Drop 67-70 for the C3PO19 cruise
metadata <- subset(metadata, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Flip the depths for station 67-60 to fix them.
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_QQ"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_QQ"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_QQ"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_QQ"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_QQ"),0,depth)))))) -> metadata
# Drop 67-70
metadata <- subset(metadata, !(SAMPLING_station %in% c("67-70")))
## Create a new field for sampling device, since the metadata has different formats, but always has a SAMPLING_platform field that we can use to determine what the sampling device was.
# netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
# netfilt_samples <- metadata$sample_name[netfilts]
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- subset(metadata, metadata$sample_name %in% metadata$sample_name[netfilts])
netfilt_sample_names <- netfilt_samples$sample_name
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_sample_names,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER","MCARTHUR II", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos","Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_cruise %in% c("Flyer2018", "Lasker2018"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(SAMPLING_platform %in% c("SCUBA") | SAMPLING_platform_type %in% c("scuba"),"SCUBA",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU"),"3G ESP",metadata$sample_type))))))))))
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN12S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name))) -> metadata_updated
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN12S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name))) -> metadata_updated
## Location
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
metadata_updated <- subset(metadata_updated, !(sampling_device %in% c("Net filtrate")))
## Depth as categorical
# sort(metadata_updated$depth)
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5.5,"0-5",
ifelse(metadata_updated$depth > 5.5 & metadata_updated$depth <= 20,"6-20",
ifelse(metadata_updated$depth > 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 500,"280-500",
ifelse(metadata_updated$depth >= 600 & metadata_updated$depth <= 700,"600-700",
"unknown"))))))))))) -> metadata_updated
metadata_updated$depth_cat <- factor(metadata_updated$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
# Use lubridate to convert date/time to posixct
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
class(metadata_updated$date_time_updated)
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
# Rearrange so that the touchdown replicates are first, and then drop the classic replicates
metadata_updated %>% dplyr::arrange(.,desc(PCR_settings)) %>% distinct(.,original_name,.keep_all = TRUE) -> metadata_updated
# Change sample_name to row names
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
# rownames(metadata_updated) <- NULL
# metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(read.csv(file = "/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_tax_table.csv",row.names = 1)))
master_phyloseq_allplates <- merge_phyloseq(asv_table,sample_data,tax_table)
master_phyloseq_allplates <- prune_samples(sample_data(master_phyloseq_allplates)$libraryID != "II",master_phyloseq_allplates)
master_phyloseq_allplates <- prune_samples(!(sample_data(master_phyloseq_allplates)$libraryID == "QQ" & sample_data(master_phyloseq_allplates)$PCR_settings == "classic"),master_phyloseq_allplates)
nsamples(master_phyloseq_allplates)
ntaxa(master_phyloseq_allplates)
sum(sample_sums(master_phyloseq_allplates))
#phyloseq_species <- tax_glom(master_phyloseq,taxrank=rank_names(phyloseq)[7], NArm = FALSE)
master_phyloseq_12S <- master_phyloseq_allplates %>%
subset_taxa(
!(Kingdom %in% c("Archaea","Bacteria"))
)
## Decontaminate
# no humans
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Order != "Carnivora"
)
# no rats!! and other rodents
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Felidae"
)
# no rabbits
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Order != "Lagomorpha"
)
# Subset out KOSMOS samples
kosmos_12S_phyloseq <- prune_samples(sample_data(master_phyloseq_12S)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_12S)
kosmos_12S_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_phyloseq)>0,kosmos_12S_phyloseq)
# kosmos_12S <- left_join(kosmos_12S_asv_table, kosmos_12S_tax_table, by = "ASV")
# write.csv(kosmos_12S, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_12S_061620.csv")
# EXPORT FILES FOR UPLOAD TO GITHUB
# ASV table
kosmos_12S_asv_table <- as.data.frame(as(otu_table(kosmos_12S_phyloseq),"matrix"))
rownames_to_column(kosmos_12S_asv_table,"ASV") -> kosmos_12S_asv_table
# tax table
kosmos_12S_tax_table <- as.data.frame(as(tax_table(kosmos_12S_phyloseq),"matrix"))
rownames_to_column(kosmos_12S_tax_table,"ASV") -> kosmos_12S_tax_table
# metadata
kosmos_12S_metadata <- as.data.frame(as.matrix(sample_data(kosmos_12S_phyloseq)))
rownames_to_column(kosmos_12S_metadata,"SampleID") -> kosmos_12S_metadata
# export three files
write.csv(kosmos_12S_asv_table, here::here("data", "kosmos_eDNA_data", "kosmos_12S_asv_table.csv"), row.names = FALSE)
write.csv(kosmos_12S_tax_table, here::here("data", "kosmos_eDNA_data", "kosmos_12S_tax_table.csv"), row.names = FALSE)
write.csv(kosmos_12S_metadata, here::here("data", "kosmos_eDNA_data", "kosmos_12S_metadata.csv"), row.names = FALSE)
# read data
kosmos_12S_asv_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_12S_asv_table.csv"), row.names = 1)
kosmos_12S_tax_table <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_12S_tax_table.csv"), row.names = 1)
kosmos_12S_metadata <- read.csv(here::here("data", "kosmos_eDNA_data", "kosmos_12S_metadata.csv"), row.names = 1)
# reformat and join as phyloseq object
asv_table <- otu_table(kosmos_12S_asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(kosmos_12S_metadata)
tax_table <- tax_table(as.matrix(kosmos_12S_tax_table))
kosmos_12S_phyloseq <- merge_phyloseq(asv_table,sample_data,tax_table)
# Subset only environmental samples (non-controls)
kosmos_12S_16_phyloseq <- prune_samples(sample_data(kosmos_12S_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_12S_phyloseq)
kosmos_12S_16_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_16_phyloseq)>0,kosmos_12S_16_phyloseq)
chloroplast_asv <- read.csv(here::here("data", "Worden_lab_data", "kosmos_chloroplast_asv_table.csv"),row.names = 1)
rownames(chloroplast_asv) <- NULL
chloroplast_asv %>% column_to_rownames(.,"asv") -> chloroplast_asv
kosmos_16S_metadata <- read.csv(here::here("data", "Worden_lab_data", "KOSMOS_sample_data_16S.csv"),row.names = 1)
chloroplast_tax_table <- read.csv(here::here("data", "Worden_lab_data", "kosmos_chloroplast_tax_table_noX.csv"), row.names = 1)
rownames(chloroplast_tax_table) <- NULL
chloroplast_tax_table %>% column_to_rownames(.,"asv") -> chloroplast_tax_table
chloroplast_phyloseq <- merge_phyloseq(otu_table(chloroplast_asv,taxa_are_rows = TRUE),tax_table(as.matrix(chloroplast_tax_table)),sample_data(kosmos_16S_metadata))
# Subset only M1 and Pacific samples
chloroplast_16_phyloseq <- prune_samples(sample_data(chloroplast_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), chloroplast_phyloseq)
# Drop all taxa with zero reads
chloroplast_16_phyloseq <- prune_taxa(taxa_sums(chloroplast_16_phyloseq)>0,chloroplast_16_phyloseq)
bacteria_tax_table <- read.csv(here::here("data", "Worden_lab_data", "kosmos_bacteria_tax_table.csv"), row.names = 1)
bacteria_tax_table$Kingdom <- as.character(bacteria_tax_table$Kingdom)
bacteria_tax_table$Phylum <- as.character(bacteria_tax_table$Phylum)
bacteria_tax_table$Class <- as.character(bacteria_tax_table$Class)
bacteria_tax_table$Order <- as.character(bacteria_tax_table$Order)
bacteria_tax_table$Family <- as.character(bacteria_tax_table$Family)
bacteria_tax_table$Genus <- as.character(bacteria_tax_table$Genus)
bacteria_tax_table$Species <- as.character(bacteria_tax_table$Species)
# Use a for loop to change all unwanted values to blank
unwanted_taxonomy <- c("uncultured","ambiguous","unidentified","metagenome")
columns <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
for (i in seq(1,length(unwanted_taxonomy))){
bacteria_tax_table$Species[grep(unwanted_taxonomy[i], bacteria_tax_table$Species,ignore.case = TRUE)] <- ""
bacteria_tax_table$Genus[grep(unwanted_taxonomy[i], bacteria_tax_table$Genus,ignore.case = TRUE)] <- ""
bacteria_tax_table$Family[grep(unwanted_taxonomy[i], bacteria_tax_table$Family,ignore.case = TRUE)] <- ""
bacteria_tax_table$Order[grep(unwanted_taxonomy[i], bacteria_tax_table$Order,ignore.case = TRUE)] <- ""
bacteria_tax_table$Class[grep(unwanted_taxonomy[i], bacteria_tax_table$Class,ignore.case = TRUE)] <- ""
bacteria_tax_table$Phylum[grep(unwanted_taxonomy[i], bacteria_tax_table$Phylum,ignore.case = TRUE)] <- ""
bacteria_tax_table$Kingdom[grep(unwanted_taxonomy[i], bacteria_tax_table$Kingdom,ignore.case = TRUE)] <- ""
}
bacteria_asv <- read.csv(here::here("data", "Worden_lab_data", "kosmos_bacteria_asv_table.csv"),row.names = 1)
# bacteria_asv %>% column_to_rownames(.,"asv") -> bacteria_asv
kosmos_16S_metadata <- read.csv(here::here("data", "Worden_lab_data", "KOSMOS_sample_data_16S.csv"),row.names = 1)
# bacteria_tax_table <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_bacteria_tax_table.csv", row.names = 1)
bacteria_phyloseq <- merge_phyloseq(otu_table(bacteria_asv,taxa_are_rows = TRUE),tax_table(as.matrix(bacteria_tax_table)),sample_data(kosmos_16S_metadata))
# Subset out only M1 and Pacific samples
bacteria_16_phyloseq <- prune_samples(sample_data(bacteria_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), bacteria_phyloseq)
# Remove all taxa that have zero reads
bacteria_16_phyloseq <- prune_taxa(taxa_sums(bacteria_16_phyloseq) > 0,bacteria_16_phyloseq)
bacteria_16_phyloseq_pro <- subset_taxa(bacteria_16_phyloseq, Genus == "Prochlorococcus_MIT9313")
bacteria_16_phyloseq_syn <- subset_taxa(bacteria_16_phyloseq, Genus %in% c("Synechococcus_PCC-7902","Synechococcus_CC9902"))
sample_sums(bacteria_16_phyloseq_pro)
## KOSMOS.D1.M1BS KOSMOS.D1.MPBS KOSMOS.D15.M1AS KOSMOS.D15.MPAS KOSMOS.D24.M1A
## 61 191 57 309 29
## KOSMOS.D24.MPAS KOSMOS.D32.M1AS KOSMOS.D32.MPAS KOSMOS.D36.M1AS KOSMOS.D36.MPAS
## 21 33 88 25 57
## KOSMOS.D42.M1AS KOSMOS.D42.MPAS KOSMOS.D48.M1AS KOSMOS.D48.MPAS KOSMOS.D8.M1AS
## 0 286 11 145 25
## KOSMOS.D8.MPAS
## 2577
sample_sums(bacteria_16_phyloseq_syn)
## KOSMOS.D1.M1BS KOSMOS.D1.MPBS KOSMOS.D15.M1AS KOSMOS.D15.MPAS KOSMOS.D24.M1A
## 23446 16508 1410 5798 760
## KOSMOS.D24.MPAS KOSMOS.D32.M1AS KOSMOS.D32.MPAS KOSMOS.D36.M1AS KOSMOS.D36.MPAS
## 2445 485 2013 112 3069
## KOSMOS.D42.M1AS KOSMOS.D42.MPAS KOSMOS.D48.M1AS KOSMOS.D48.MPAS KOSMOS.D8.M1AS
## 121 1825 609 2120 983
## KOSMOS.D8.MPAS
## 10254
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_16_phyloseq)
ntaxa <- ntaxa(kosmos_COI_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI <- data.frame(statistic_names,statistic_values)
overview_statistics_COI
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 2527
## 3 Number of Reads 1236508
## 4 Phyla 30
## 5 Classes 67
## 6 Orders 144
## 7 Families 228
## 8 Genera 72
## 9 Species 58
COI_phyla <- unique(tax_table_all.df$Phylum)
COI_genera <- unique(tax_table_all.df$Genus)
kosmos_COI_M1_phyloseq <- prune_samples(sample_data(kosmos_COI_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_COI_16_phyloseq)
kosmos_COI_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_M1_phyloseq)>0,kosmos_COI_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_M1_phyloseq)
ntaxa <- ntaxa(kosmos_COI_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_COI_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 1784
## 3 Number of Reads 635388
## 4 Phyla 25
## 5 Classes 57
## 6 Orders 117
## 7 Families 166
## 8 Genera 48
## 9 Species 41
# Calculate mean + SD
kosmos_COI_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_COI_M1_phyloseq))))
kosmos_COI_M1_sample_sums <- rownames_to_column(kosmos_COI_M1_sample_sums,"sample")
colnames(kosmos_COI_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_COI_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 79423.5 22805.56 8062.984 19065.93
kosmos_COI_MP_phyloseq <- prune_samples(sample_data(kosmos_COI_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_COI_16_phyloseq)
kosmos_COI_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_MP_phyloseq)>0,kosmos_COI_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_MP_phyloseq)
ntaxa <- ntaxa(kosmos_COI_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_COI_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 1961
## 3 Number of Reads 601120
## 4 Phyla 28
## 5 Classes 61
## 6 Orders 128
## 7 Families 195
## 8 Genera 57
## 9 Species 49
# Calculate mean + SD
kosmos_COI_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_COI_MP_phyloseq))))
kosmos_COI_MP_sample_sums <- rownames_to_column(kosmos_COI_MP_sample_sums,"sample")
colnames(kosmos_COI_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_COI_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 75140 6205.367 2193.929 5187.817
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_16_phyloseq)
ntaxa <- ntaxa(kosmos_18S_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S <- data.frame(statistic_names,statistic_values)
overview_statistics_18S
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 3296
## 3 Number of Reads 1759671
## 4 Phyla 41
## 5 Classes 95
## 6 Orders 183
## 7 Families 254
## 8 Genera 269
## 9 Species 262
phyla_18S <- unique(tax_table_all.df$Phylum)
genera_18S <- unique(tax_table_all.df$Genus)
kosmos_18S_M1_phyloseq <- prune_samples(sample_data(kosmos_18S_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_18S_16_phyloseq)
kosmos_18S_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_M1_phyloseq)>0,kosmos_18S_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_M1_phyloseq)
ntaxa <- ntaxa(kosmos_18S_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_18S_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 2105
## 3 Number of Reads 1006473
## 4 Phyla 31
## 5 Classes 76
## 6 Orders 139
## 7 Families 189
## 8 Genera 214
## 9 Species 196
# Calculate mean + SD
kosmos_18S_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_18S_M1_phyloseq))))
kosmos_18S_M1_sample_sums <- rownames_to_column(kosmos_18S_M1_sample_sums,"sample")
colnames(kosmos_18S_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_18S_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 125809.1 26365.08 9321.464 22041.76
kosmos_18S_MP_phyloseq <- prune_samples(sample_data(kosmos_18S_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_18S_16_phyloseq)
kosmos_18S_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_MP_phyloseq)>0,kosmos_18S_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_MP_phyloseq)
ntaxa <- ntaxa(kosmos_18S_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_18S_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 2825
## 3 Number of Reads 753198
## 4 Phyla 41
## 5 Classes 89
## 6 Orders 169
## 7 Families 226
## 8 Genera 241
## 9 Species 233
# Calculate mean + SD
kosmos_18S_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_18S_MP_phyloseq))))
kosmos_18S_MP_sample_sums <- rownames_to_column(kosmos_18S_MP_sample_sums,"sample")
colnames(kosmos_18S_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_18S_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 94149.75 19034.32 6729.649 15913.09
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_16_phyloseq)
ntaxa <- ntaxa(kosmos_12S_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for genus, thus now -3
ngenera <- length(unique(tax_table_all.df$Genus)) - 3
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S <- data.frame(statistic_names,statistic_values)
overview_statistics_12S
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 445
## 3 Number of Reads 216841
## 4 Phyla 0
## 5 Classes 2
## 6 Orders 21
## 7 Families 30
## 8 Genera 31
## 9 Species 25
phyla_12S <- unique(tax_table_all.df$Phylum)
genera_12S <- unique(tax_table_all.df$Genus)
kosmos_12S_M1_phyloseq <- prune_samples(sample_data(kosmos_12S_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_12S_16_phyloseq)
kosmos_12S_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_M1_phyloseq)>0,kosmos_12S_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_M1_phyloseq)
ntaxa <- ntaxa(kosmos_12S_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unassigned", "unknown", and "no_hit",
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, but there is a value for "bacterium, thus -4
nspecies <- length(unique(tax_table_all.df$Species)) - 4
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_12S_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 348
## 3 Number of Reads 105635
## 4 Phyla -1
## 5 Classes 1
## 6 Orders 15
## 7 Families 21
## 8 Genera 23
## 9 Species 20
# Calculate mean + SD
kosmos_12S_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_12S_M1_phyloseq))))
kosmos_12S_M1_sample_sums <- rownames_to_column(kosmos_12S_M1_sample_sums,"sample")
colnames(kosmos_12S_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_12S_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 13204.38 9063.226 3204.334 7577.047
kosmos_12S_MP_phyloseq <- prune_samples(sample_data(kosmos_12S_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_12S_16_phyloseq)
kosmos_12S_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_MP_phyloseq)>0,kosmos_12S_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_MP_phyloseq)
ntaxa <- ntaxa(kosmos_12S_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, but there is a value for "bacterium, thus -4
nspecies <- length(unique(tax_table_all.df$Species)) - 4
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_12S_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 246
## 3 Number of Reads 111206
## 4 Phyla 0
## 5 Classes 2
## 6 Orders 20
## 7 Families 26
## 8 Genera 24
## 9 Species 18
# Calculate mean + SD
kosmos_12S_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_12S_MP_phyloseq))))
kosmos_12S_MP_sample_sums <- rownames_to_column(kosmos_12S_MP_sample_sums,"sample")
colnames(kosmos_12S_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_12S_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 13900.75 6019.894 2128.354 5032.758
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_16_phyloseq)
ntaxa <- ntaxa(chloroplast_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_16_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 258
## 3 Number of Reads 241783
## 4 Phyla 7
## 5 Classes 14
## 6 Orders 16
## 7 Families 15
## 8 Genera 4
## 9 Species 5
chloroplast_phyla <- unique(tax_table_all.df$Phylum)
chloroplast_genera <- unique(tax_table_all.df$Genus)
chloroplast_M1_phyloseq <- prune_samples(sample_data(chloroplast_16_phyloseq)$SAMPLING_station_number == "M1", chloroplast_16_phyloseq)
chloroplast_M1_phyloseq <- prune_taxa(taxa_sums(chloroplast_M1_phyloseq)>0,chloroplast_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_M1_phyloseq)
ntaxa <- ntaxa(chloroplast_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_M1_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 158
## 3 Number of Reads 73050
## 4 Phyla 5
## 5 Classes 13
## 6 Orders 15
## 7 Families 14
## 8 Genera 2
## 9 Species 2
# Calculate mean + SD
kosmos_chloroplast_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(chloroplast_M1_phyloseq))))
kosmos_chloroplast_M1_sample_sums <- rownames_to_column(kosmos_chloroplast_M1_sample_sums,"sample")
colnames(kosmos_chloroplast_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_chloroplast_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 9131.25 6511.553 2302.182 5443.794
chloroplast_MP_phyloseq <- prune_samples(sample_data(chloroplast_16_phyloseq)$SAMPLING_station_number == "MP", chloroplast_16_phyloseq)
chloroplast_MP_phyloseq <- prune_taxa(taxa_sums(chloroplast_MP_phyloseq)>0,chloroplast_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_MP_phyloseq)
ntaxa <- ntaxa(chloroplast_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_MP_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 203
## 3 Number of Reads 168733
## 4 Phyla 7
## 5 Classes 13
## 6 Orders 16
## 7 Families 15
## 8 Genera 4
## 9 Species 5
# Calculate mean + SD
kosmos_chloroplast_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(chloroplast_MP_phyloseq))))
kosmos_chloroplast_MP_sample_sums <- rownames_to_column(kosmos_chloroplast_MP_sample_sums,"sample")
colnames(kosmos_chloroplast_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_chloroplast_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 21091.62 10737.76 3796.371 8976.992
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_16_phyloseq)
ntaxa <- ntaxa(bacteria_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_16_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 5786
## 3 Number of Reads 3125836
## 4 Phyla 38
## 5 Classes 74
## 6 Orders 174
## 7 Families 256
## 8 Genera 478
## 9 Species 16
bacteria_phyla <- unique(tax_table_all.df$Phylum)
bacteria_genera <- unique(tax_table_all.df$Genus)
bacteria_M1_phyloseq <- prune_samples(sample_data(bacteria_16_phyloseq)$SAMPLING_station_number == "M1", bacteria_16_phyloseq)
bacteria_M1_phyloseq <- prune_taxa(taxa_sums(bacteria_M1_phyloseq)>0,bacteria_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_M1_phyloseq)
ntaxa <- ntaxa(bacteria_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_M1_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 3223
## 3 Number of Reads 1624665
## 4 Phyla 24
## 5 Classes 43
## 6 Orders 130
## 7 Families 183
## 8 Genera 348
## 9 Species 10
# Calculate mean + SD
kosmos_bacteria_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(bacteria_M1_phyloseq))))
kosmos_bacteria_M1_sample_sums <- rownames_to_column(kosmos_bacteria_M1_sample_sums,"sample")
colnames(kosmos_bacteria_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_bacteria_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 203083.1 20875.4 7380.568 17452.27
bacteria_MP_phyloseq <- prune_samples(sample_data(bacteria_16_phyloseq)$SAMPLING_station_number == "MP", bacteria_16_phyloseq)
bacteria_MP_phyloseq <- prune_taxa(taxa_sums(bacteria_MP_phyloseq)>0,bacteria_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_MP_phyloseq)
ntaxa <- ntaxa(bacteria_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_MP_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 4139
## 3 Number of Reads 1501171
## 4 Phyla 38
## 5 Classes 72
## 6 Orders 166
## 7 Families 242
## 8 Genera 414
## 9 Species 8
# Calculate mean + SD
kosmos_bacteria_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(bacteria_MP_phyloseq))))
kosmos_bacteria_MP_sample_sums <- rownames_to_column(kosmos_bacteria_MP_sample_sums,"sample")
colnames(kosmos_bacteria_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_bacteria_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 187646.4 36331.09 12844.98 30373.55
COI_phyla <- as.character(COI_phyla)
phyla_18S <- as.character(phyla_18S)
phyla_12S <- as.character(phyla_12S)
chloroplast_phyla <- as.character(chloroplast_phyla)
bacteria_phyla <- as.character(bacteria_phyla)
# don't count 12S for now, unless we decide to include them in the paper
all_phyla <- c(COI_phyla, phyla_12S, phyla_18S, bacteria_phyla, chloroplast_phyla)
length(unique(all_phyla)) -4
## [1] 86
# unknown, blank, no_hit, and unassigned are all in here, thus -4
sort(unique(all_phyla))
## [1] "" "Acidobacteria"
## [3] "Actinobacteria" "Annelida"
## [5] "Apicomplexa" "Arthropoda"
## [7] "Ascomycota" "Bacillariophyta"
## [9] "Bacteroidetes" "Basidiomycota"
## [11] "Blastocladiomycota" "Brachiopoda"
## [13] "Bryozoa" "Calditrichaeota"
## [15] "Cercozoa" "Chaetognatha"
## [17] "Chloroflexi" "Chlorophyta"
## [19] "Chordata" "Chytridiomycota"
## [21] "Ciliophora" "Cloacimonetes"
## [23] "Cnidaria" "Cryptophyta"
## [25] "Ctenophora" "Cyanobacteria"
## [27] "Dadabacteria" "Deferribacteres"
## [29] "Deinococcus-Thermus" "Dependentiae"
## [31] "Discoba" "Discosea"
## [33] "Echinodermata" "Epsilonbacteraeota"
## [35] "Euglenida" "Euglenozoa"
## [37] "Fibrobacteres" "Firmicutes"
## [39] "Foraminifera" "Fusobacteria"
## [41] "Gastrotricha" "Gemmatimonadetes"
## [43] "Haptista" "Haptophyta"
## [45] "Hemichordata" "Heterolobosea"
## [47] "Hydrogenedentes" "Imbricatea"
## [49] "Kiritimatiellaeota" "Latescibacteria"
## [51] "Lentisphaerae" "Margulisbacteria"
## [53] "Marinimicrobia_(SAR406_clade)" "Microsporidia"
## [55] "Mollusca" "Mucoromycota"
## [57] "Nematoda" "Nemertea"
## [59] "Nitrospinae" "no_hit"
## [61] "Ochrophyta" "Omnitrophicaeota"
## [63] "Onychophora" "Patescibacteria"
## [65] "Phoronida" "Picozoa"
## [67] "Placozoa" "Planctomycetes"
## [69] "Platyhelminthes" "Poribacteria"
## [71] "Porifera" "Proteobacteria"
## [73] "Rappemonad" "Rhodophyta"
## [75] "Rokubacteria" "Rotifera"
## [77] "RsaHF231" "Spirochaetes"
## [79] "Streptophyta" "Synergistetes"
## [81] "Tenericutes" "Thermotogae"
## [83] "Tubulinea" "unassigned"
## [85] "unknown" "Verrucomicrobia"
## [87] "WOR-1" "WPS-2"
## [89] "Zixibacteria" "Zoopagomycota"
COI_genera <- as.character(COI_genera)
genera_18S <- as.character(genera_18S)
genera_12S <- as.character(genera_12S)
chloroplast_genera <- as.character(chloroplast_genera)
bacteria_genera <- as.character(bacteria_genera)
# don't count 12S for now, unless we decide to include them in the paper
all_genera <- c(COI_genera, genera_12S, genera_18S, bacteria_genera, chloroplast_genera)
# unknown, blank, no_hit, g_, and unassigned are all in here, thus -5
# Control 1 = MilliQ, Control 2 = RO
kosmos_COI_field_blanks <- prune_samples(sample_names(kosmos_COI_phyloseq) %in% c("KOSMOSD3_MQCBa_X","KOSMOSD3_ROCBa_X"),kosmos_COI_phyloseq)
kosmos_COI_field_blanks <- prune_taxa(taxa_sums(kosmos_COI_field_blanks) > 0, kosmos_COI_field_blanks)
kosmos_18S_field_blanks <- prune_samples(sample_names(kosmos_18S_phyloseq) %in% c("KOSMOSD3_MQCBa_UU","KOSMOSD3_ROCBa_UU"),kosmos_18S_phyloseq)
kosmos_18S_field_blanks <- prune_taxa(taxa_sums(kosmos_18S_field_blanks) > 0, kosmos_18S_field_blanks)
kosmos_12S_field_blanks <- prune_samples(sample_names(kosmos_12S_phyloseq) %in% c("KOSMOSD3_MQCBa_TT","KOSMOSD3_ROCBa_TT"),kosmos_12S_phyloseq)
kosmos_12S_field_blanks <- prune_taxa(taxa_sums(kosmos_12S_field_blanks) > 0, kosmos_12S_field_blanks)
kosmos_bacteria_field_blanks <- prune_samples(sample_names(bacteria_phyloseq) %in% c("KOSMOS.D3.control1","KOSMOS.D3.control2"),bacteria_phyloseq)
kosmos_bacteria_field_blanks <- prune_taxa(taxa_sums(kosmos_bacteria_field_blanks) > 0, kosmos_bacteria_field_blanks)
kosmos_chloroplast_field_blanks <- prune_samples(sample_names(chloroplast_phyloseq) %in% c("KOSMOS.D3.control1","KOSMOS.D3.control2"),chloroplast_phyloseq)
kosmos_chloroplast_field_blanks <- prune_taxa(taxa_sums(kosmos_chloroplast_field_blanks) > 0, kosmos_chloroplast_field_blanks)
kosmos_COI_field_blanks_class <- tax_glom(kosmos_COI_field_blanks,taxrank=rank_names(kosmos_COI_field_blanks)[3], NArm = FALSE)
kosmos_18S_field_blanks_class <- tax_glom(kosmos_18S_field_blanks,taxrank=rank_names(kosmos_18S_field_blanks)[3], NArm = FALSE)
kosmos_12S_field_blanks_class <- tax_glom(kosmos_12S_field_blanks,taxrank=rank_names(kosmos_12S_field_blanks)[3], NArm = FALSE)
kosmos_bacteria_field_blanks_class <- tax_glom(kosmos_bacteria_field_blanks,taxrank=rank_names(kosmos_bacteria_field_blanks)[3], NArm = FALSE)
kosmos_chloroplast_field_blanks_class <- tax_glom(kosmos_chloroplast_field_blanks,taxrank=rank_names(kosmos_chloroplast_field_blanks)[3], NArm = FALSE)
# taxa_sums(kosmos_COI_field_blanks)
# taxa_sums(kosmos_18S_field_blanks)
# taxa_sums(kosmos_12S_field_blanks)
# taxa_sums(kosmos_bacteria_field_blanks_class)
# taxa_sums(kosmos_chloroplast_field_blanks_class)
# Create a table that documents ASVs found in field blanks and their abundance vs their abundance in the environmental samples
kosmos_COI_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_COI_field_blanks),"matrix"))
kosmos_COI_field_blanks_tax_table <- rownames_to_column(kosmos_COI_field_blanks_tax_table,"ASV")
kosmos_COI_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_COI_field_blanks_tax_table
kosmos_COI_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_COI_field_blanks))
kosmos_COI_field_blanks_asv_table <- rownames_to_column(kosmos_COI_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_COI_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_X + KOSMOSD3_ROCBa_X)/(sum(kosmos_COI_field_blanks_asv_table$KOSMOSD3_MQCBa_X) + sum(kosmos_COI_field_blanks_asv_table$KOSMOSD3_ROCBa_X))) -> kosmos_COI_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_COI_16_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_16_asv_table <- rownames_to_column(kosmos_COI_16_asv_table, "ASV")
kosmos_COI_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_COI_16_asv_table
kosmos_COI_envt_means <- kosmos_COI_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_COI_field_blanks_v_envt <- left_join(kosmos_COI_field_blanks_asv_table,kosmos_COI_envt_means, by = "ASV")
kosmos_COI_field_blanks_v_envt <- kosmos_COI_field_blanks_v_envt[order(-kosmos_COI_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_COI_field_blanks_v_envt <- left_join(kosmos_COI_field_blanks_v_envt, kosmos_COI_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_COI_field_blanks_v_envt, here::here("figures", "field_blanks", "COI_field_blank_composition.csv"))
kosmos_18S_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_18S_field_blanks),"matrix"))
kosmos_18S_field_blanks_tax_table <- rownames_to_column(kosmos_18S_field_blanks_tax_table,"ASV")
kosmos_18S_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_18S_field_blanks_tax_table
kosmos_18S_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_18S_field_blanks))
kosmos_18S_field_blanks_asv_table <- rownames_to_column(kosmos_18S_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_18S_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_UU + KOSMOSD3_ROCBa_UU)/(sum(kosmos_18S_field_blanks_asv_table$KOSMOSD3_MQCBa_UU) + sum(kosmos_18S_field_blanks_asv_table$KOSMOSD3_ROCBa_UU))) -> kosmos_18S_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_18S_16_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_16_asv_table <- rownames_to_column(kosmos_18S_16_asv_table, "ASV")
kosmos_18S_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_18S_16_asv_table
kosmos_18S_envt_means <- kosmos_18S_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_18S_field_blanks_v_envt <- left_join(kosmos_18S_field_blanks_asv_table,kosmos_18S_envt_means, by = "ASV")
kosmos_18S_field_blanks_v_envt <- kosmos_18S_field_blanks_v_envt[order(-kosmos_18S_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_18S_field_blanks_v_envt <- left_join(kosmos_18S_field_blanks_v_envt, kosmos_18S_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_18S_field_blanks_v_envt, here::here("figures", "field_blanks", "18S_field_blank_composition.csv"))
kosmos_12S_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_12S_field_blanks),"matrix"))
kosmos_12S_field_blanks_tax_table <- rownames_to_column(kosmos_12S_field_blanks_tax_table,"ASV")
kosmos_12S_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_12S_field_blanks_tax_table
kosmos_12S_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_12S_field_blanks))
kosmos_12S_field_blanks_asv_table <- rownames_to_column(kosmos_12S_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_12S_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_TT + KOSMOSD3_ROCBa_TT)/(sum(kosmos_12S_field_blanks_asv_table$KOSMOSD3_MQCBa_TT) + sum(kosmos_12S_field_blanks_asv_table$KOSMOSD3_ROCBa_TT))) -> kosmos_12S_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_12S_16_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq))
kosmos_12S_16_asv_table <- rownames_to_column(kosmos_12S_16_asv_table, "ASV")
kosmos_12S_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_12S_16_asv_table
kosmos_12S_envt_means <- kosmos_12S_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_12S_field_blanks_v_envt <- left_join(kosmos_12S_field_blanks_asv_table,kosmos_12S_envt_means, by = "ASV")
kosmos_12S_field_blanks_v_envt <- kosmos_12S_field_blanks_v_envt[order(-kosmos_12S_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_12S_field_blanks_v_envt <- left_join(kosmos_12S_field_blanks_v_envt, kosmos_12S_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_12S_field_blanks_v_envt, here::here("figures", "field_blanks", "12S_field_blank_composition.csv"))
kosmos_bacteria_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_bacteria_field_blanks),"matrix"))
kosmos_bacteria_field_blanks_tax_table <- rownames_to_column(kosmos_bacteria_field_blanks_tax_table,"ASV")
kosmos_bacteria_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_bacteria_field_blanks_tax_table
kosmos_bacteria_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_bacteria_field_blanks))
kosmos_bacteria_field_blanks_asv_table <- rownames_to_column(kosmos_bacteria_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_bacteria_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOS.D3.control1 + KOSMOS.D3.control2)/(sum(kosmos_bacteria_field_blanks_asv_table$KOSMOS.D3.control1) + sum(kosmos_bacteria_field_blanks_asv_table$KOSMOS.D3.control2))) -> kosmos_bacteria_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_bacteria_16_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
kosmos_bacteria_16_asv_table <- rownames_to_column(kosmos_bacteria_16_asv_table, "ASV")
kosmos_bacteria_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_bacteria_16_asv_table
kosmos_bacteria_envt_means <- kosmos_bacteria_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_bacteria_field_blanks_v_envt <- left_join(kosmos_bacteria_field_blanks_asv_table,kosmos_bacteria_envt_means, by = "ASV")
kosmos_bacteria_field_blanks_v_envt <- kosmos_bacteria_field_blanks_v_envt[order(-kosmos_bacteria_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_bacteria_field_blanks_v_envt <- left_join(kosmos_bacteria_field_blanks_v_envt, kosmos_bacteria_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_bacteria_field_blanks_v_envt, here::here("figures", "field_blanks", "bacteria_field_blank_composition.csv"))
kosmos_chloroplast_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_chloroplast_field_blanks),"matrix"))
kosmos_chloroplast_field_blanks_tax_table <- rownames_to_column(kosmos_chloroplast_field_blanks_tax_table,"ASV")
kosmos_chloroplast_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_chloroplast_field_blanks_tax_table
kosmos_chloroplast_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_chloroplast_field_blanks))
kosmos_chloroplast_field_blanks_asv_table <- rownames_to_column(kosmos_chloroplast_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_chloroplast_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOS.D3.control1 + KOSMOS.D3.control2)/(sum(kosmos_chloroplast_field_blanks_asv_table$KOSMOS.D3.control1) + sum(kosmos_chloroplast_field_blanks_asv_table$KOSMOS.D3.control2))) -> kosmos_chloroplast_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_chloroplast_16_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
kosmos_chloroplast_16_asv_table <- rownames_to_column(kosmos_chloroplast_16_asv_table, "ASV")
kosmos_chloroplast_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_chloroplast_16_asv_table
kosmos_chloroplast_envt_means <- kosmos_chloroplast_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_chloroplast_field_blanks_v_envt <- left_join(kosmos_chloroplast_field_blanks_asv_table,kosmos_chloroplast_envt_means, by = "ASV")
kosmos_chloroplast_field_blanks_v_envt <- kosmos_chloroplast_field_blanks_v_envt[order(-kosmos_chloroplast_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_chloroplast_field_blanks_v_envt <- left_join(kosmos_chloroplast_field_blanks_v_envt, kosmos_chloroplast_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_chloroplast_field_blanks_v_envt, here::here("figures", "field_blanks", "chloroplast_field_blank_composition.csv"))
Note When running the DEICODE PCA, we are manually flipping the sign of our ordination results so that they all align nicely for the plot.
legend_df <- data.frame(c("Pacific","Mesocosm 1 (M1)"),c(1,1),c(0.78,0.975))
colnames(legend_df) <- c("station","position_x","position_y")
legend_gg <- ggplot(data = legend_df, aes(x = position_x, y = position_y, color = station, shape = station))+
geom_point(size = 14)+
geom_text(data = subset(legend_df,station == "Pacific"), aes(x = position_x+0.2, y = position_y+0.0175, color = station,label = station),hjust = 0, size = 18)+
geom_text(data = subset(legend_df,station == "Mesocosm 1 (M1)"), aes(x = position_x+0.2, y = position_y+0.01, color = station,label = station),hjust = 0, size = 18)+
scale_color_manual(name = "station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = margin(0, 0, 0, 0, "cm"),)+
ylim(0.7,1.1)+
xlim(0.9,3.5)
# annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
# annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacificß
legend_gg
ggsave(here::here("figures", "PCA_figure", "PCA_legend_v2.png"),legend_gg, width = 7, height = 2)
kosmos_COI_16_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_16_asv_table <- rownames_to_column(kosmos_COI_16_asv_table,"#OTUID")
write.table(kosmos_COI_16_asv_table,here::here("local_deicode_runs", "COI", "kosmos_COI_16_asv_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_COI_16_tax_table <- as.data.frame(as(tax_table(kosmos_COI_16_phyloseq),"matrix"))
kosmos_COI_16_tax_table <- rownames_to_column(kosmos_COI_16_tax_table,"#OTUID")
write.table(kosmos_COI_16_tax_table,here::here("local_deicode_runs", "COI", "kosmos_COI_16_tax_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_COI_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_COI_16_phyloseq)))
kosmos_COI_16samples_metadata <- rownames_to_column(kosmos_COI_16samples_metadata,"#SampleID")
kosmos_COI_16samples_metadata <- kosmos_COI_16samples_metadata[,!(names(kosmos_COI_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_COI_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_COI_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_COI_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_COI_16samples_metadata
# kosmos_COI_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_COI_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_COI_16samples_metadata$sample))) -> kosmos_COI_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_COI_16samples_metadata,here::here("local_deicode_runs", "COI", "kosmos_COI_16_sample_data_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza(here::here("local_deicode_runs", "COI", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (56.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (42.9%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (0.9%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_COI_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
# Manually change sign of PC1
pca_data$PC1 <- pca_data$PC1*-1
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("P32","P15","P8","P1","M24","M32")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P42","M1","M48","M42","M36")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M8","P48")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.3,size = 12)+
xlim(NA,max(pca_data$PC1)+0.03)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
ggtitle("COI")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (56.2%)"
## [1] "PC2 (42.9%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "COI_pca_kosmos_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 6)
##### PERMANOVA #####
# Extract metadata
pca_metadata <- pca_data
# pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
# Create a new column to compare M1 post OMZ water addition with other samples
pca_metadata %>%
mutate(permanova_group = ifelse(SAMPLING_station == "Pacific" | SAMPLING_station == "Mesocosm 1" & sample == "M1" | SAMPLING_station == "Mesocosm 1" & sample == "M8",
"Pacific & M1 pre-OMZ", "M1 post-OMZ")) -> pca_metadata
## Load COI Data
distance <- read_qza(here::here("local_deicode_runs", "COI", "distance.qza"))
# Extract distance matrix
distance_matrix <- distance$data
# convert DEICODE matrix to "dist" class object
PCA_dist <- as.dist(distance_matrix)
##----Run individual PERMANOVAs-------------------------------------------------------------
# PERMANOVA for CTD vs. ESP
set.seed(123)
M1_v_Pacific_permanova_COI <- adonis2(PCA_dist ~ permanova_group, data = pca_metadata, permutations=999)
M1_v_Pacific_permanova_COI
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = PCA_dist ~ permanova_group, data = pca_metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## permanova_group 1 14.769 0.45689 11.778 0.001 ***
## Residual 14 17.556 0.54311
## Total 15 32.325 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
kosmos_18S_16_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_16_asv_table <- rownames_to_column(kosmos_18S_16_asv_table,"#OTUID")
write.table(kosmos_18S_16_asv_table,here::here("local_deicode_runs", "18S", "kosmos_18S_16_asv_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_18S_16_tax_table <- as.data.frame(as(tax_table(kosmos_18S_16_phyloseq),"matrix"))
kosmos_18S_16_tax_table <- rownames_to_column(kosmos_18S_16_tax_table,"#OTUID")
write.table(kosmos_18S_16_tax_table,here::here("local_deicode_runs", "18S", "kosmos_18S_16_tax_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_18S_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_18S_16_phyloseq)))
kosmos_18S_16samples_metadata <- rownames_to_column(kosmos_18S_16samples_metadata,"#SampleID")
kosmos_18S_16samples_metadata <- kosmos_18S_16samples_metadata[,!(names(kosmos_18S_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_18S_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_18S_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_18S_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_18S_16samples_metadata
# kosmos_18S_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_18S_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_18S_16samples_metadata$sample))) -> kosmos_18S_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_18S_16samples_metadata,here::here("local_deicode_runs", "18S", "kosmos_18S_16_sample_data_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza(here::here("local_deicode_runs", "18S", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (60.8%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.3%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (2%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_18S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
# Manually change signs of PC1 and PC2
pca_data$PC1 <- pca_data$PC1*-1
pca_data$PC2 <- pca_data$PC2*-1
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P32","P8")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c()),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P1","P15","P48","P36","M36","M32")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","P42","M15","M24")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
ggtitle("18S rRNA")+
xlim(NA,max(pca_data$PC1)+0.03)
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (60.8%)"
## [1] "PC2 (37.3%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "18S_pca_kosmos_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 6)
##### PERMANOVA #####
# Extract metadata
pca_metadata <- pca_data
# pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
# Create a new column to compare M1 post OMZ water addition with other samples
pca_metadata %>%
mutate(permanova_group = ifelse(SAMPLING_station == "Pacific" | SAMPLING_station == "Mesocosm 1" & sample == "M1" | SAMPLING_station == "Mesocosm 1" & sample == "M8",
"Pacific & M1 pre-OMZ", "M1 post-OMZ")) -> pca_metadata
## Load COI Data
distance <- read_qza(here::here("local_deicode_runs", "18S", "distance.qza"))
# Extract distance matrix
distance_matrix <- distance$data
# convert DEICODE matrix to "dist" class object
PCA_dist <- as.dist(distance_matrix)
##----Run individual PERMANOVAs-------------------------------------------------------------
# PERMANOVA for CTD vs. ESP
set.seed(123)
M1_v_Pacific_permanova_18S <- adonis2(PCA_dist ~ permanova_group, data = pca_metadata, permutations=999)
M1_v_Pacific_permanova_18S
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = PCA_dist ~ permanova_group, data = pca_metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## permanova_group 1 14.766 0.45661 11.764 0.001 ***
## Residual 14 17.573 0.54339
## Total 15 32.339 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
kosmos_12S_16_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq))
kosmos_12S_16_asv_table <- rownames_to_column(kosmos_12S_16_asv_table,"#OTUID")
write.table(kosmos_12S_16_asv_table,here::here("local_deicode_runs", "12S", "kosmos_12S_16_asv_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_16_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
kosmos_12S_16_tax_table <- rownames_to_column(kosmos_12S_16_tax_table,"#OTUID")
write.table(kosmos_12S_16_tax_table,here::here("local_deicode_runs", "12S", "kosmos_12S_16_tax_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_12S_16_phyloseq)))
kosmos_12S_16samples_metadata <- rownames_to_column(kosmos_12S_16samples_metadata,"#SampleID")
kosmos_12S_16samples_metadata <- kosmos_12S_16samples_metadata[,!(names(kosmos_12S_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_12S_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_12S_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_12S_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_12S_16samples_metadata
# kosmos_12S_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_12S_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_12S_16samples_metadata$sample))) -> kosmos_12S_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_12S_16samples_metadata,here::here("local_deicode_runs", "12S", "kosmos_12S_16_sample_data_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza(here::here("local_deicode_runs", "12S", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (59.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (31.6%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (9.3%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_12S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P32","P8","P42")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P48")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P15","P36","M36","M32","M15")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","M24","P1")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
plot.title = element_text(size = 50, face = "bold", hjust = 0.5),
legend.position = "none")+
ggtitle("12S")+
xlim(NA,max(pca_data$PC1)+0.03)
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (59.2%)"
## [1] "PC2 (31.6%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "12S_pca_kosmos_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 5)
kosmos_12S_chordata_16_phyloseq <- subset_taxa(kosmos_12S_16_phyloseq, Phylum == "Chordata")
kosmos_12S_chordata_16_asv_table <- as.data.frame(otu_table(kosmos_12S_chordata_16_phyloseq))
kosmos_12S_chordata_16_asv_table <- rownames_to_column(kosmos_12S_chordata_16_asv_table,"#OTUID")
write.table(kosmos_12S_chordata_16_asv_table,here::here("local_deicode_runs", "12S", "chordata", "kosmos_12S_chordata_16_asv_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_chordata_16_tax_table <- as.data.frame(as(tax_table(kosmos_12S_chordata_16_phyloseq),"matrix"))
kosmos_12S_chordata_16_tax_table <- rownames_to_column(kosmos_12S_chordata_16_tax_table,"#OTUID")
write.table(kosmos_12S_chordata_16_tax_table,here::here("local_deicode_runs", "12S", "chordata", "kosmos_12S_chordata_16_tax_table_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_chordata_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_12S_chordata_16_phyloseq)))
kosmos_12S_chordata_16samples_metadata <- rownames_to_column(kosmos_12S_chordata_16samples_metadata,"#SampleID")
kosmos_12S_chordata_16samples_metadata <- kosmos_12S_chordata_16samples_metadata[,!(names(kosmos_12S_chordata_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M1","P1","M15","P15","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_12S_chordata_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(1,1,15,15,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_12S_chordata_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_12S_chordata_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_12S_chordata_16samples_metadata
# kosmos_12S_chordata_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_12S_chordata_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_12S_chordata_16samples_metadata$sample))) -> kosmos_12S_chordata_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_12S_chordata_16samples_metadata,here::here("local_deicode_runs", "12S", "chordata", "kosmos_12S_chordata_16_sample_data_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza(here::here("local_deicode_runs", "12S", "chordata", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (57.5%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (39.4%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (3.2%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_12S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P8","P42")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P1","P48")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P36","M36","M32","M15")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","M24","P32","P15")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
# annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("12S")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (57.5%)"
## [1] "PC2 (39.4%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "12S_pca_kosmos_chordata_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 6)
chloroplast_16_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
chloroplast_16_asv_table <- rownames_to_column(chloroplast_16_asv_table,"#OTUID")
write.table(chloroplast_16_asv_table,here::here("local_deicode_runs", "16S_plastidial", "kosmos_chloroplast_asv_table_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
chloroplast_16_tax_table<- as.data.frame(as(tax_table(chloroplast_16_phyloseq),"matrix"))
chloroplast_16_tax_table <- rownames_to_column(chloroplast_16_tax_table,"#OTUID")
write.table(chloroplast_16_tax_table,here::here("local_deicode_runs", "16S_plastidial", "kosmos_chloroplast_tax_table_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_16S_16_metadata <- as.data.frame(as.matrix(sample_data(chloroplast_16_phyloseq)))
kosmos_16S_16_metadata <- rownames_to_column(kosmos_16S_16_metadata,"#SampleID")
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_16S_16_metadata$sample <- as.character(kosmos_16S_16_metadata$sample)
kosmos_16S_16_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_16S_16_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_16S_16_metadata,here::here("local_deicode_runs", "16S_plastidial", "KOSMOS_sample_data_16S_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco<-read_qza(here::here("local_deicode_runs", "16S_plastidial", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (55.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.4%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (7.3%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_16S_16_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = SAMPLING_day))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("P42","P32","P15","P8","P1","M24","M36")),aes(x=PC1+0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("M1","M48","M42","M32")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M8","P48")),aes(x=PC1+0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(c)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("16S Plastidial")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (55.2%)"
## [1] "PC2 (37.4%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "16S_plastidial_pca_kosmos_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 6)
##### PERMANOVA #####
# Extract metadata
pca_metadata <- pca_data
# pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
# Create a new column to compare M1 post OMZ water addition with other samples
pca_metadata %>%
mutate(permanova_group = ifelse(SAMPLING_station == "Pacific" | SAMPLING_station == "Mesocosm 1" & sample == "M1" | SAMPLING_station == "Mesocosm 1" & sample == "M8",
"Pacific & M1 pre-OMZ", "M1 post-OMZ")) -> pca_metadata
## Load COI Data
distance <- read_qza(here::here("local_deicode_runs", "16S_plastidial", "distance.qza"))
# Extract distance matrix
distance_matrix <- distance$data
# convert DEICODE matrix to "dist" class object
PCA_dist <- as.dist(distance_matrix)
##----Run individual PERMANOVAs-------------------------------------------------------------
# PERMANOVA for CTD vs. ESP
set.seed(123)
M1_v_Pacific_permanova_16S_plastidial <- adonis2(PCA_dist ~ permanova_group, data = pca_metadata, permutations=999)
M1_v_Pacific_permanova_16S_plastidial
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = PCA_dist ~ permanova_group, data = pca_metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## permanova_group 1 11.506 0.33991 7.2092 0.002 **
## Residual 14 22.345 0.66009
## Total 15 33.851 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bacteria_16_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
bacteria_16_asv_table <- rownames_to_column(bacteria_16_asv_table,"#OTUID")
write.table(bacteria_16_asv_table,here::here("local_deicode_runs", "16S_bacterial", "kosmos_bacteria_asv_table_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
bacteria_16_tax_table<- as.data.frame(as(tax_table(bacteria_16_phyloseq),"matrix"))
bacteria_16_tax_table <- rownames_to_column(bacteria_16_tax_table,"#OTUID")
write.table(bacteria_16_tax_table,here::here("local_deicode_runs", "16S_bacterial", "kosmos_bacteria_tax_table_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_16S_16_metadata <- as.data.frame(as.matrix(sample_data(bacteria_16_phyloseq)))
kosmos_16S_16_metadata <- rownames_to_column(kosmos_16S_16_metadata,"#SampleID")
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_16S_16_metadata$sample <- as.character(kosmos_16S_16_metadata$sample)
kosmos_16S_16_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_16S_16_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_16S_16_metadata,here::here("local_deicode_runs", "16S_bacterial", "KOSMOS_sample_data_bacteria_16samples_for_biom.txt"),sep = "\t",row.names = FALSE,quote = FALSE)
pco<-read_qza(here::here("local_deicode_runs", "16S_bacterial", "ordination.qza"))
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (62%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.2%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (0.9%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_16S_metadata
pca_metadata %>% rownames_to_column(.,"#SampleID") -> pca_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = SAMPLING_day))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M8","P42","P15","P1","M36")),aes(x=PC1+0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("M1","M42","P8","M24")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("P48")),aes(x=PC1+0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36","P32","M48","M32")),aes(x=PC1-0.008,y=PC2+0.05,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(d)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("16S Bacterial")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (62%)"
## [1] "PC2 (37.2%)"
pca_gg_sample
ggsave(here::here("figures", "PCA_figure", "16S_bacterial_pca_kosmos_3PCs_16samples_9.25.20.png"),pca_gg_sample,width = 7,height = 6)
##### PERMANOVA #####
# Extract metadata
pca_metadata <- pca_data
# pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
# Create a new column to compare M1 post OMZ water addition with other samples
pca_metadata %>%
mutate(permanova_group = ifelse(SAMPLING_station == "Pacific" | SAMPLING_station == "Mesocosm 1" & sample == "M1" | SAMPLING_station == "Mesocosm 1" & sample == "M8",
"Pacific & M1 pre-OMZ", "M1 post-OMZ")) -> pca_metadata
## Load COI Data
distance <- read_qza(here::here("local_deicode_runs", "16S_bacterial", "distance.qza"))
# Extract distance matrix
distance_matrix <- distance$data
# convert DEICODE matrix to "dist" class object
PCA_dist <- as.dist(distance_matrix)
##----Run individual PERMANOVAs-------------------------------------------------------------
# PERMANOVA for CTD vs. ESP
set.seed(123)
M1_v_Pacific_permanova_16S_bacteria <- adonis2(PCA_dist ~ permanova_group, data = pca_metadata, permutations=999)
M1_v_Pacific_permanova_16S_bacteria
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = PCA_dist ~ permanova_group, data = pca_metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## permanova_group 1 13.529 0.41943 10.114 0.001 ***
## Residual 14 18.727 0.58057
## Total 15 32.256 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
group_colors = c("Rhodophytes" = "#E15759",
"Bicosoecids" = "#F28E2B",
"Dinoflagellates" = "#B6992D",
"Prymnesiales" = "#499894",
"Diatoms" = "#4E79A7",
"Chlorophyta" = "#59A14F",
"Oomycetes" = "#F1CE63",
"Coccolithophores" = "#A0CBE8",
"Amoebozoa" = "#FF9D9A",
"Cercozoa" = "#FFBE7D",
"Cryptophytes" = "#8CD17D",
"Embryophytes" = "#D7B5A6",
"Silicoflagellates" = "#86BCB6",
"Copepods" = "#BAB0AC",
"Rotifers" = "#79706E",
"Proteobacteria" = "#9D7660",
"Bacteroidetes" = "#D4A6C8",
"Actinobacteria" = "#FABFD2",
"Marinimicrobia" = "#6a3d9a",
"Verrucomicrobia" = "#D37295",
"Patescibacteria" = "#91003f",
"Cyanobacteria" = "#B07AA1",
"Spirotrichs" = "#1b7837",
"Unassigned" = "gray10")
# Make a fake dataframe with all of the different taxa that we want
allgroups <- c("Rhodophytes",
"Bicosoecids",
"Dinoflagellates",
"Prymnesiales",
"Diatoms",
"Chlorophyta",
"Oomycetes",
"Coccolithophores",
"Amoebozoa",
"Cercozoa",
"Spirotrichs",
"Cryptophytes",
"Embryophytes",
"Silicoflagellates",
"Copepods",
"Rotifers",
"Proteobacteria",
"Bacteroidetes",
"Actinobacteria",
"Verrucomicrobia",
"Cyanobacteria",
"Patescibacteria",
"Marinimicrobia",
"Unassigned")
fakedata <- rep(0.05,24)
fakesample <- rep("nosample",24)
legend_data <- data.frame(allgroups,fakedata,fakesample)
legend_data$allgroups <- factor(legend_data$allgroups, levels = c("Rhodophytes",
"Bicosoecids",
"Dinoflagellates",
"Prymnesiales",
"Diatoms",
"Chlorophyta",
"Oomycetes",
"Coccolithophores",
"Amoebozoa",
"Cercozoa",
"Spirotrichs",
"Cryptophytes",
"Embryophytes",
"Silicoflagellates",
"Copepods",
"Rotifers",
"Proteobacteria",
"Bacteroidetes",
"Actinobacteria",
"Verrucomicrobia",
"Cyanobacteria",
"Patescibacteria",
"Marinimicrobia",
"Unassigned"))
legend_gg<-ggplot(legend_data, aes(x=fakesample,y=fakedata,fill=allgroups))+
geom_bar(stat='identity')+theme_minimal()+
scale_fill_manual(values = group_colors)+
theme(axis.text.x = element_blank(),
legend.position = "right",
axis.title=element_text(size=8),
plot.margin = unit(c(0,5,0,0), "pt"),
legend.text=element_text(size=10),
legend.title = element_blank()
)+
guides(fill=guide_legend(ncol=8))+
xlab("Sample")+
ylab("Proportion of Reads")
legend_gg
ggsave(here::here("figures", "PCA_figure", "combined_figure_legend_top30_092520.png"),legend_gg,width = 21, height = 5)
# Make color scheme
group_colors = c("Rhodophytes" = "#E15759",
"Bicosoecids" = "#F28E2B",
"Dinoflagellates" = "#B6992D",
"Prymnesiales" = "#499894",
"Diatoms" = "#4E79A7",
"Chlorophyta" = "#59A14F",
"Oomycetes" = "#F1CE63",
"Coccolithophores" = "#A0CBE8",
"Amoebozoa" = "#FF9D9A",
"Cercozoa" = "#FFBE7D",
"Cryptophytes" = "#8CD17D",
"Embryophytes" = "#D7B5A6",
"Silicoflagellates" = "#86BCB6",
"Copepods" = "#BAB0AC",
"Rotifers" = "#79706E",
"Proteobacteria" = "#9D7660",
"Bacteroidetes" = "#D4A6C8",
"Actinobacteria" = "#FABFD2",
"Marinimicrobia" = "#6a3d9a",
"Verrucomicrobia" = "#D37295",
"Patescibacteria" = "#91003f",
"Cyanobacteria" = "#B07AA1",
"Spirotrichs" = "#1b7837",
"other" = "gray10")
pco <- read_qza(here::here("local_deicode_runs", "COI", "ordination.qza"))
PC_loadings_COI <- pco$data$Species
# Scaling PCA?
# max_PC1 <- max(abs(PC_loadings_COI$PC1))
# PC_loadings_COI %>% mutate(.,PC1_scaled = PC_loadings_COI$PC1/max_PC1) -> PC_loadings_COI
# kosmos_COI_16_tax_table
kosmos_COI_tax_table <- dplyr::rename(kosmos_COI_16_tax_table,FeatureID = "#OTUID")
PC_loadings_COI <- left_join(PC_loadings_COI,kosmos_COI_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_COI$Kingdom <- as.character(PC_loadings_COI$Kingdom)
PC_loadings_COI$Phylum <- as.character(PC_loadings_COI$Phylum)
PC_loadings_COI$Class <- as.character(PC_loadings_COI$Class)
PC_loadings_COI$Order <- as.character(PC_loadings_COI$Order)
PC_loadings_COI$Family <- as.character(PC_loadings_COI$Family)
PC_loadings_COI$Genus <- as.character(PC_loadings_COI$Genus)
PC_loadings_COI$Species <- as.character(PC_loadings_COI$Species)
# Add taxonomy field that shows the lowest level of taxonomy
PC_loadings_COI <- PC_loadings_COI[order(-PC_loadings_COI$PC1),]
PC_loadings_COI %>% mutate(.,taxonomy = ifelse(!(Species %in% c("unassigned","no_hit","s_","unknown")),Species,
# ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_COI$Genus, " sp."),
ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_COI$Genus),
ifelse(!(Family %in% c("unassigned","no_hit","unknown")),paste0("Family ", PC_loadings_COI$Family),
ifelse(!(Order %in% c("unassigned","no_hit","unknown")),paste0("Order ", PC_loadings_COI$Order),
ifelse(!(Class %in% c("unassigned","no_hit","unknown")),paste0("Class ", PC_loadings_COI$Class),
ifelse(!(Phylum %in% c("unassigned","no_hit","unknown")),paste0("Phylum ", PC_loadings_COI$Phylum),
"Unassigned"))))))) -> PC_loadings_COI
# Create new "Group" field that groups taxa together.
PC_loadings_COI %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other")))))))))))))))))) -> PC_loadings_COI
#9D7660
#D4A6C8
#FABFD2
#BAB0AC
#F1CE63
unique(PC_loadings_COI$group)
## [1] "Rotifers" "Cercozoa" "Coccolithophores" "Oomycetes"
## [5] "other" "Amoebozoa" "Copepods" "Diatoms"
## [9] "Rhodophytes" "Prymnesiales" "Dinoflagellates" "Bicosoecids"
## [13] "Chlorophyta"
# Add in information from ASV_tophit_F_taxonomy table
tophit_taxonomy <- read.csv("/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/COI/X/ASV_tophit_Ftaxonomy.csv")
colnames(tophit_taxonomy)[1] <- "FeatureID"
colnames(tophit_taxonomy)[6] <- "precent_ID"
tophit_taxonomy <- tophit_taxonomy[,c(1:6,9)]
PC_loadings_COI <- left_join(PC_loadings_COI,tophit_taxonomy,by = "FeatureID")
#write.csv(PC_loadings_COI,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_COI.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
kosmos_COI_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_asv_table_matrix <- as.matrix(kosmos_COI_asv_table)
kosmos_COI_sample_sums <- as.vector(colSums(kosmos_COI_asv_table))
t(t(kosmos_COI_asv_table_matrix)/kosmos_COI_sample_sums) -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table <- as.data.frame(kosmos_COI_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
kosmos_COI_normalized_asv_table %>% rownames_to_column(.,"ASV") -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table %>% mutate(.,novercutoff = 0) -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table %>% column_to_rownames(.,"ASV") -> kosmos_COI_normalized_asv_table
for (i in seq(1:nrow(kosmos_COI_normalized_asv_table))){
kosmos_COI_normalized_asv_table$novercutoff[i] = sum(kosmos_COI_normalized_asv_table[i,] > 0.001)
}
kosmos_COI_normalized_asv_table_subset <- subset(kosmos_COI_normalized_asv_table, novercutoff >= 4)
kosmos_COI_normalized_asv_table_subset <- kosmos_COI_normalized_asv_table_subset[!(names(kosmos_COI_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(kosmos_COI_normalized_asv_table_subset)
PC_loadings_COI_subset <- subset(PC_loadings_COI, FeatureID %in% abundant_ASVs)
kosmos_COI_asv_table_for_supp <- rownames_to_column(kosmos_COI_asv_table, "FeatureID")
supp_table <- left_join(PC_loadings_COI_subset,kosmos_COI_asv_table_for_supp, by = "FeatureID")
# write.csv(supp_table,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/supplementary_table_1.csv")
# Plot all ASVs that are abundant (for supplemental figures)
COI_PCA_gg <- ggplot(PC_loadings_COI_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Class))+
geom_bar(stat = "identity")+
ylim(-0.25,0.25)+
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
annotate("segment", x = 15.5,xend = 15.5, y = 0, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = -0.2, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 8, y = -0.175,label = "Top 15 ASVs\n(negative)",size = 4.5,fontface = "bold")+
annotate("segment", x = 72.5,xend = 72.5, y = 0, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0.2, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 80, y = 0.175,label = "Top 15 ASVs\n(positive)",size = 4.5,fontface = "bold")+
# annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)+
scale_fill_tableau(palette = "Tableau 20")
COI_PCA_gg
# ggsave("/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/COI_PC1_unmerged_all_052220.png",COI_PCA_gg,width = 10, height = 7.5)
# Plot all taxa, top 15 on either side
PC_loadings_COI_30 <- PC_loadings_COI_subset[c(1:15, (nrow(PC_loadings_COI_subset)-14):nrow(PC_loadings_COI_subset)),]
COI_PCA_gg <- ggplot(PC_loadings_COI_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_COI_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black"),fontface = ifelse(!(Genus %in% c("g_","unassigned","no_hit")),"bold.italic","bold")), size = 5)+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(e)", vjust=1.5, hjust=1.25,size = 12)
COI_PCA_gg
ggsave(here::here("figures", "PCA_figure", "COI_PC1_unmerged_top30_072720.png"),COI_PCA_gg,width = 7, height = 8.5)
pco <- read_qza(here::here("local_deicode_runs", "18S", "ordination.qza"))
PC_loadings_18S <- pco$data$Species
# Scaling PCA?
# max_PC1 <- max(abs(PC_loadings_18S$PC1))
# PC_loadings_18S %>% mutate(.,PC1_scaled = PC_loadings_18S$PC1/max_PC1) -> PC_loadings_18S
# kosmos_18S_16_tax_table
kosmos_18S_tax_table <- dplyr::rename(kosmos_18S_16_tax_table,FeatureID = "#OTUID")
PC_loadings_18S <- left_join(PC_loadings_18S,kosmos_18S_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_18S$Kingdom <- as.character(PC_loadings_18S$Kingdom)
PC_loadings_18S$Phylum <- as.character(PC_loadings_18S$Phylum)
PC_loadings_18S$Class <- as.character(PC_loadings_18S$Class)
PC_loadings_18S$Order <- as.character(PC_loadings_18S$Order)
PC_loadings_18S$Family <- as.character(PC_loadings_18S$Family)
PC_loadings_18S$Genus <- as.character(PC_loadings_18S$Genus)
PC_loadings_18S$Species <- as.character(PC_loadings_18S$Species)
# Add taxonomy field that shows the lowest level of taxonomy
PC_loadings_18S <- PC_loadings_18S[order(-PC_loadings_18S$PC1),]
PC_loadings_18S %>% mutate(.,taxonomy = ifelse(!(Species %in% c("unassigned","no_hit","s_","unknown","uncultured marine picoeukaryote")),Species,
# ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_18S$Genus, " sp."),
ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_18S$Genus),
ifelse(!(Family %in% c("unassigned","no_hit","unknown")),paste0("Family ", PC_loadings_18S$Family),
ifelse(!(Order %in% c("unassigned","no_hit","unknown")),paste0("Order ", PC_loadings_18S$Order),
ifelse(!(Class %in% c("unassigned","no_hit","unknown")),paste0("Class ", PC_loadings_18S$Class),
ifelse(!(Phylum %in% c("unassigned","no_hit","unknown")),paste0("Phylum ", PC_loadings_18S$Phylum),
"Unassigned"))))))) -> PC_loadings_18S
# Create new "Group" field that groups taxa together.
PC_loadings_18S %>% mutate(.,group =
# 18S & COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae") | Genus == "Protaspis","Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
ifelse(Class == "Spirotrichea", "Spirotrichs",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other"))))))))))))))))))) -> PC_loadings_18S
#9D7660
#D4A6C8
#FABFD2
#BAB0AC
#F1CE63
# unique(PC_loadings_18S$group)
# Add in information from ASV_tophit_F_taxonomy table
tophit_taxonomy <- read.csv("/Users/markusmin/Documents/MBARI-2167/local/eDNA_meta_analysis_local_data/banzai_dada2/18S/UU/ASV_tophit_Ftaxonomy.csv")
colnames(tophit_taxonomy)[1] <- "FeatureID"
colnames(tophit_taxonomy)[6] <- "precent_ID"
tophit_taxonomy <- tophit_taxonomy[,c(1:6,9)]
PC_loadings_18S <- left_join(PC_loadings_18S,tophit_taxonomy,by = "FeatureID")
#write.csv(PC_loadings_18S,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_18S.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
kosmos_18S_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_asv_table_matrix <- as.matrix(kosmos_18S_asv_table)
kosmos_18S_sample_sums <- as.vector(colSums(kosmos_18S_asv_table))
t(t(kosmos_18S_asv_table_matrix)/kosmos_18S_sample_sums) -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table <- as.data.frame(kosmos_18S_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
kosmos_18S_normalized_asv_table %>% rownames_to_column(.,"ASV") -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table %>% mutate(.,novercutoff = 0) -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table %>% column_to_rownames(.,"ASV") -> kosmos_18S_normalized_asv_table
for (i in seq(1:nrow(kosmos_18S_normalized_asv_table))){
kosmos_18S_normalized_asv_table$novercutoff[i] = sum(kosmos_18S_normalized_asv_table[i,] > 0.001)
}
kosmos_18S_normalized_asv_table_subset <- subset(kosmos_18S_normalized_asv_table, novercutoff >= 4)
kosmos_18S_normalized_asv_table_subset <- kosmos_18S_normalized_asv_table_subset[!(names(kosmos_18S_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(kosmos_18S_normalized_asv_table_subset)
PC_loadings_18S_subset <- subset(PC_loadings_18S, FeatureID %in% abundant_ASVs)
kosmos_18S_asv_table_for_supp <- rownames_to_column(kosmos_18S_asv_table, "FeatureID")
supp_table <- left_join(PC_loadings_18S_subset,kosmos_18S_asv_table_for_supp, by = "FeatureID")
# write.csv(supp_table,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/supplementary_table_1.csv")
# Plot all ASVs that are abundant (for supplemental figures)
PCA_gg_18S <- ggplot(PC_loadings_18S_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Class))+
geom_bar(stat = "identity")+
ylim(-0.25,0.25)+
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
annotate("segment", x = 15.5,xend = 15.5, y = 0, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = -0.2, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 8, y = -0.175,label = "Top 15 ASVs\n(negative)",size = 4.5,fontface = "bold")+
annotate("segment", x = 72.5,xend = 72.5, y = 0, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0.2, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 80, y = 0.175,label = "Top 15 ASVs\n(positive)",size = 4.5,fontface = "bold")+
# annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)+
scale_fill_tableau(palette = "Tableau 20")
PCA_gg_18S
# ggsave("/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/18S_PC1_unmerged_all_052220.png",PCA_gg_18S,width = 10, height = 7.5)
# Plot all taxa, top 15 on either side
PC_loadings_18S_30 <- PC_loadings_18S_subset[c(1:15, (nrow(PC_loadings_18S_subset)-14):nrow(PC_loadings_18S_subset)),]
PCA_gg_18S <- ggplot(PC_loadings_18S_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_18S_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black"),fontface = ifelse(!(Genus %in% c("g_","unassigned","no_hit")),"bold.italic","bold")), size = 5)+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(f)", vjust=1.5, hjust=1.25,size = 12)
PCA_gg_18S
ggsave(here::here("figures", "PCA_figure", "18S_PC1_unmerged_top30_092520.png"),PCA_gg_18S,width = 7, height = 8.5)
pco<-read_qza(here::here("local_deicode_runs", "16S_plastidial", "ordination.qza"))
PC_loadings_chloroplast <- pco$data$Species
kosmos_chloroplast_tax_table <- dplyr::rename(chloroplast_16_tax_table,FeatureID = "#OTUID")
PC_loadings_chloroplast <- left_join(PC_loadings_chloroplast,kosmos_chloroplast_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_chloroplast$Kingdom <- as.character(PC_loadings_chloroplast$Kingdom)
PC_loadings_chloroplast$Supergroup <- as.character(PC_loadings_chloroplast$Supergroup)
PC_loadings_chloroplast$Phylum <- as.character(PC_loadings_chloroplast$Phylum)
PC_loadings_chloroplast$Class <- as.character(PC_loadings_chloroplast$Class)
PC_loadings_chloroplast$Subclass <- as.character(PC_loadings_chloroplast$Subclass)
PC_loadings_chloroplast$Order <- as.character(PC_loadings_chloroplast$Order)
PC_loadings_chloroplast$Family <- as.character(PC_loadings_chloroplast$Family)
PC_loadings_chloroplast$Genus <- as.character(PC_loadings_chloroplast$Genus)
PC_loadings_chloroplast$Species <- as.character(PC_loadings_chloroplast$Species)
# Add new taxonomy column
PC_loadings_chloroplast <- PC_loadings_chloroplast[order(-PC_loadings_chloroplast$PC1),]
PC_loadings_chloroplast %>% mutate(.,taxonomy = ifelse(!(Species %in% c("")),Species,
ifelse(!(Genus %in% c("")),paste0(PC_loadings_chloroplast$Genus),
# ifelse(!(Genus %in% c("")),paste0(PC_loadings_chloroplast$Genus, " sp."),
ifelse(!(Family %in% c("")),paste0("Family ", PC_loadings_chloroplast$Family),
ifelse(!(Order %in% c("")),paste0("Order ", PC_loadings_chloroplast$Order),
ifelse(!(Subclass %in% c("")),paste0("Subclass ", PC_loadings_chloroplast$Subclass),
ifelse(!(Class %in% c("")),paste0("Class ", PC_loadings_chloroplast$Class),
ifelse(!(Phylum %in% c("")),paste0("Phylum ", PC_loadings_chloroplast$Phylum),
"Unassigned")))))))) -> PC_loadings_chloroplast
PC_loadings_chloroplast %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Class %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyochophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other"))))))))))))))))))) -> PC_loadings_chloroplast
### Normalize each sample to 1, have the proportion of reads be for each sample
chloroplast_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
chloroplast_asv_table_matrix <- as.matrix(chloroplast_asv_table)
chloroplast_sample_sums <- as.vector(colSums(chloroplast_asv_table))
t(t(chloroplast_asv_table_matrix)/chloroplast_sample_sums) -> chloroplast_normalized_asv_table
chloroplast_normalized_asv_table <- as.data.frame(chloroplast_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
chloroplast_normalized_asv_table %>% mutate(.,novercutoff = 0) -> chloroplast_normalized_asv_table
for (i in seq(1:nrow(chloroplast_normalized_asv_table))){
chloroplast_normalized_asv_table$novercutoff[i] = sum(chloroplast_normalized_asv_table[i,] > 0.001)
}
chloroplast_normalized_asv_table_subset <- subset(chloroplast_normalized_asv_table, novercutoff >= 4)
chloroplast_normalized_asv_table_subset <- chloroplast_normalized_asv_table_subset[!(names(chloroplast_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(chloroplast_normalized_asv_table_subset)
PC_loadings_chloroplast_subset <- subset(PC_loadings_chloroplast, FeatureID %in% abundant_ASVs)
chloroplast_PCA_gg <- ggplot(PC_loadings_chloroplast_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Family))+
geom_bar(stat = "identity")+
ylim(-0.31,0.31)+
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
# scale_fill_manual(values = PC_loadings_colors)+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)
chloroplast_PCA_gg
# Plot top 15 positive and negative, all reads
PC_loadings_chloroplast_30 <- PC_loadings_chloroplast_subset[c(1:15, (nrow(PC_loadings_chloroplast_subset)-14):nrow(PC_loadings_chloroplast_subset)),]
chloroplast_PCA_gg <- ggplot(PC_loadings_chloroplast_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_chloroplast_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black")), size = 5,fontface = "bold")+
ylim(-0.4,0.4)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(g)", vjust=1.5, hjust=1.25,size = 12)
chloroplast_PCA_gg
ggsave(here::here("figures", "PCA_figure", "16S_plastidial_PC1_unmerged_top30_092520.png"),chloroplast_PCA_gg,width = 7, height = 8.5)
pco<-read_qza(here::here("local_deicode_runs", "16S_bacterial", "ordination.qza"))
PC_loadings_bacteria <- pco$data$Species
kosmos_bacteria_tax_table <- dplyr::rename(bacteria_16_tax_table,FeatureID = "#OTUID")
PC_loadings_bacteria <- left_join(PC_loadings_bacteria,kosmos_bacteria_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_bacteria$Kingdom <- as.character(PC_loadings_bacteria$Kingdom)
PC_loadings_bacteria$Phylum <- as.character(PC_loadings_bacteria$Phylum)
PC_loadings_bacteria$Class <- as.character(PC_loadings_bacteria$Class)
PC_loadings_bacteria$Order <- as.character(PC_loadings_bacteria$Order)
PC_loadings_bacteria$Family <- as.character(PC_loadings_bacteria$Family)
PC_loadings_bacteria$Genus <- as.character(PC_loadings_bacteria$Genus)
PC_loadings_bacteria$Species <- as.character(PC_loadings_bacteria$Species)
# Based on David's note, change the taxonomy of the "unassigned" bacterial ASV to Class Alphaproteobacteria, Phylum Proteobacteria, Kingdom Bacteria
PC_loadings_bacteria <- within(PC_loadings_bacteria, Kingdom[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Bacteria')
PC_loadings_bacteria <- within(PC_loadings_bacteria, Phylum[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Proteobacteria')
PC_loadings_bacteria <- within(PC_loadings_bacteria, Class[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Alphaproteobacteria')
# Sort by PC1 loadings
PC_loadings_bacteria <- PC_loadings_bacteria[order(-PC_loadings_bacteria$PC1),]
PC_loadings_bacteria %>% mutate(.,taxonomy = ifelse(!(Species %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),Species,
ifelse(!(Genus %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0(PC_loadings_bacteria$Genus),
# ifelse(!(Genus %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0(PC_loadings_bacteria$Genus, " sp."),
ifelse(!(Family %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0("Family ", PC_loadings_bacteria$Family),
ifelse(!(Order %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0("Order ", PC_loadings_bacteria$Order),
ifelse(!(Class %in% c("")),paste0("Class ", PC_loadings_bacteria$Class),
ifelse(!(Phylum %in% c("")),paste0("Phylum ", PC_loadings_bacteria$Phylum),
ifelse(!(Kingdom %in% c("","Unassigned")),paste0("Kingdom ", PC_loadings_bacteria$Kingdom),"Unassigned")))))))) -> PC_loadings_bacteria
PC_loadings_bacteria %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Chrysochromulina",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Chrysochromulina",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other")))))))))))))))))) -> PC_loadings_bacteria
# write.csv(PC_loadings_bacteria,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_bacteria.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
bacteria_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
bacteria_asv_table_matrix <- as.matrix(bacteria_asv_table)
bacteria_sample_sums <- as.vector(colSums(bacteria_asv_table))
t(t(bacteria_asv_table_matrix)/bacteria_sample_sums) -> bacteria_normalized_asv_table
bacteria_normalized_asv_table <- as.data.frame(bacteria_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
bacteria_normalized_asv_table <- rownames_to_column(bacteria_normalized_asv_table, "ASV")
bacteria_normalized_asv_table %>% mutate(.,novercutoff = 0) -> bacteria_normalized_asv_table
bacteria_normalized_asv_table <- column_to_rownames(bacteria_normalized_asv_table, "ASV")
for (i in seq(1:nrow(bacteria_normalized_asv_table))){
bacteria_normalized_asv_table$novercutoff[i] = sum(bacteria_normalized_asv_table[i,] > 0.001)
}
bacteria_normalized_asv_table_subset <- subset(bacteria_normalized_asv_table, novercutoff >= 4)
bacteria_normalized_asv_table_subset <- bacteria_normalized_asv_table_subset[!(names(bacteria_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(bacteria_normalized_asv_table_subset)
PC_loadings_bacteria_subset <- subset(PC_loadings_bacteria, FeatureID %in% abundant_ASVs)
bacteria_PCA_gg <- ggplot(PC_loadings_bacteria_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Family))+
geom_bar(stat = "identity")+
ylim(-0.31,0.31)+
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
# scale_fill_manual(values = PC_loadings_colors)+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)
bacteria_PCA_gg
# Plot top 15 positive and negative, all ASVs
PC_loadings_bacteria_30 <- PC_loadings_bacteria_subset[c(1:15, (nrow(PC_loadings_bacteria_subset)-14):nrow(PC_loadings_bacteria_subset)),]
# Change some labels
## Marinimicrobia
PC_loadings_bacteria_30 %>% mutate(.,Phylum = ifelse(Phylum == "Marinimicrobia_(SAR406_clade)","Marinimicrobia",
ifelse(Phylum == "","other", Phylum))) -> PC_loadings_bacteria_30
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Phylum Marinimicrobia_(SAR406_clade)","Phylum Marinimicrobia",taxonomy)) -> PC_loadings_bacteria_30
## Clade la
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Clade_Ia","SAR11 Ia",taxonomy)) -> PC_loadings_bacteria_30
## NS9_marine_group
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Family NS9_marine_group","NS9_marine_group",taxonomy)) -> PC_loadings_bacteria_30
## SAR86
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Order SAR86_clade","SAR86 clade",taxonomy)) -> PC_loadings_bacteria_30
# Change the label of Synechococcus CC9902, as suggested by Sebastian
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Synechococcus_CC9902","Family Synechococcaceae",taxonomy)) -> PC_loadings_bacteria_30
# Remove underscores from group labels
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = gsub("_"," ",taxonomy)) -> PC_loadings_bacteria_30
bacteria_PCA_gg <- ggplot(PC_loadings_bacteria_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Phylum))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_bacteria_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black")),size = 5,fontface = ifelse(!(PC_loadings_bacteria_30$Genus %in% c("","Synechococcus_CC9902")|grepl("group",PC_loadings_bacteria_30$Genus)|grepl("clade",PC_loadings_bacteria_30$Genus,ignore.case = TRUE)),"bold.italic","bold"))+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(h)", vjust=1.5, hjust=1.25,size = 12)
bacteria_PCA_gg
ggsave(here::here("figures", "PCA_figure", "16S_bacterial_PC1_unmerged_top30_100520.png"),bacteria_PCA_gg,width = 7, height = 8.5)
kosmos_12S_sample_sums <- as.data.frame(sample_sums(kosmos_12S_16_phyloseq))
kosmos_12S_sample_sums <- rownames_to_column(kosmos_12S_sample_sums,"sample")
colnames(kosmos_12S_sample_sums)[2] <- "total_reads"
kosmos_12S_sample_sums %>% mutate(.,sample_name = substr(gsub("KOSMOS","",sample),1,nchar(gsub("KOSMOS","",sample))-5)) -> kosmos_12S_sample_sums
# Add a mesocosm vs. Pacific column
kosmos_12S_sample_sums$sample_name <- as.character(kosmos_12S_sample_sums$sample_name)
kosmos_12S_sample_sums %>% mutate(.,station = substr(kosmos_12S_sample_sums$sample_name,nchar(kosmos_12S_sample_sums$sample_name)-1,nchar(kosmos_12S_sample_sums$sample_name))) -> kosmos_12S_sample_sums
kosmos_12S_sample_sums$sample_name <- factor(kosmos_12S_sample_sums$sample_name, levels = c("D1_M1", "D1_MP", "D8_M1", "D8_MP", "D15_M1", "D15_MP", "D24_M1", "D24_MP", "D32_M1", "D32_MP", "D36_M1", "D36_MP", "D42_M1", "D42_MP", "D48_M1", "D48_MP"))
kosmos_12S_sample_sums_gg <- ggplot(kosmos_12S_sample_sums,aes(x = sample_name,y = total_reads,fill = station))+
geom_bar(stat = "identity")+
ggtitle("12S reads per sample")+
theme(axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 15,angle = 90),
axis.title = element_text(size = 18),
plot.title = element_text(size = 25))+
ylab("Total Reads")+
xlab("Sample")
kosmos_12S_sample_sums_gg
ggsave(here::here("figures", "12S", "12S_sample_sums.png"),kosmos_12S_sample_sums_gg,width = 10, height = 6)
kosmos_12S_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
kosmos_12S_tax_table <- rownames_to_column(kosmos_12S_tax_table,"ASV")
kosmos_12S_tax_table %>% mutate_all(as.character) -> kosmos_12S_tax_table
# Sum of reads by ASV
kosmos_12S_taxa_sums <- as.data.frame(taxa_sums(kosmos_12S_16_phyloseq))
kosmos_12S_taxa_sums <- rownames_to_column(kosmos_12S_taxa_sums,"ASV")
colnames(kosmos_12S_taxa_sums)[2] <- "total_reads"
kosmos_12S_taxa_sums <- left_join(kosmos_12S_taxa_sums, kosmos_12S_tax_table, by = "ASV")
kosmos_12S_taxa_sums <- kosmos_12S_taxa_sums[order(-kosmos_12S_taxa_sums$total_reads),]
# Let's group by taxonomy and sum
dplyr::select(kosmos_12S_taxa_sums,-ASV) %>% group_by(Kingdom,Phylum, Class,Order, Family, Genus, Species) %>% summarise_all(funs(sum)) -> kosmos_12S_taxa_sums_grouped
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
kosmos_12S_taxa_sums_grouped <- kosmos_12S_taxa_sums_grouped[order(-kosmos_12S_taxa_sums_grouped$total_reads),]
write.csv(kosmos_12S_taxa_sums_grouped,here::here("figures", "12S", "kosmos_12S_taxa_sums.csv"))
kosmos_12S_16_phyloseq_chordata <- subset_taxa(kosmos_12S_16_phyloseq, Phylum == "Chordata")
# Remove Paralichthys reads
kosmos_12S_16_phyloseq_chordata <- subset_taxa(kosmos_12S_16_phyloseq_chordata, Genus != "Paralichthys")
kosmos_12S_16_phyloseq_chordata_merged <- tax_glom(kosmos_12S_16_phyloseq_chordata,taxrank = rank_names(kosmos_12S_16_phyloseq_chordata)[7],NArm = FALSE)
kosmos_12S_chordata_merged_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq_chordata_merged))
kosmos_12S_chordata_merged_asv_table <- rownames_to_column(kosmos_12S_chordata_merged_asv_table,"ASV")
kosmos_12S_chordata_merged_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq_chordata_merged),"matrix"))
kosmos_12S_chordata_merged_tax_table <- rownames_to_column(kosmos_12S_chordata_merged_tax_table,"ASV")
kosmos_12S_chordata_merged_tax_table %>% mutate_all(as.character) -> kosmos_12S_chordata_merged_tax_table
kosmos_12S_chordata_merged_combined_table <- left_join(kosmos_12S_chordata_merged_asv_table,kosmos_12S_chordata_merged_tax_table,by = "ASV")
# Create a new taxonomy field, with best taxonomy (family or below)
kosmos_12S_chordata_merged_combined_table %>% mutate(.,taxonomy = ifelse(!(Species %in% c("s_","unassigned")),Species,
ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),Family,
ifelse(!(Order %in% c("unassigned")),Order,"unassigned"))))) -> kosmos_12S_chordata_merged_combined_table
# Sum of reads by taxonomy
kosmos_12S_chordata_merged_taxa_sums <- as.data.frame(taxa_sums(kosmos_12S_16_phyloseq_chordata_merged))
kosmos_12S_chordata_merged_taxa_sums <- rownames_to_column(kosmos_12S_chordata_merged_taxa_sums,"ASV")
colnames(kosmos_12S_chordata_merged_taxa_sums)[2] <- "total_reads"
kosmos_12S_chordata_merged_taxa_sums <- kosmos_12S_chordata_merged_taxa_sums[order(-kosmos_12S_chordata_merged_taxa_sums$total_reads),]
kosmos_12S_chordata_merged_taxa_sums <- left_join(kosmos_12S_chordata_merged_taxa_sums, kosmos_12S_chordata_merged_tax_table, by = "ASV")
# Determine top 19 taxa, so the rest can be "other
top19_kosmos_12S_taxa <- kosmos_12S_chordata_merged_taxa_sums$ASV[1:19]
kosmos_12S_chordata_merged_combined_table %>% mutate(.,taxonomy_updated = ifelse(ASV %in% top19_kosmos_12S_taxa,taxonomy,"other")) -> kosmos_12S_chordata_merged_combined_table
# Convert to long form
gathercols <- colnames(dplyr::select(kosmos_12S_chordata_merged_asv_table,-ASV))
kosmos_12S_chordata_merged_combined_table_long <- gather(kosmos_12S_chordata_merged_combined_table,key = "sample",value = "reads",gathercols,factor_key = TRUE)
# Shorten sample names
kosmos_12S_chordata_merged_combined_table_long$sample <- as.character(kosmos_12S_chordata_merged_combined_table_long$sample)
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,sample_name = substr(gsub("KOSMOS","",sample),1,nchar(gsub("KOSMOS","",sample))-5)) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,station = substr(kosmos_12S_chordata_merged_combined_table_long$sample_name,nchar(kosmos_12S_chordata_merged_combined_table_long$sample_name)-1,nchar(kosmos_12S_chordata_merged_combined_table_long$sample_name))) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$sample_name <- factor(kosmos_12S_chordata_merged_combined_table_long$sample_name, levels = c("D1_M1", "D1_MP", "D8_M1", "D8_MP", "D15_M1", "D15_MP", "D24_M1", "D24_MP", "D32_M1", "D32_MP", "D36_M1", "D36_MP", "D42_M1", "D42_MP", "D48_M1", "D48_MP"))
# Create a new data frame with total reads for each sample in order to get one border around each bar
dplyr::select(kosmos_12S_chordata_merged_combined_table_long,-c(ASV,Kingdom,Phylum, Class,Order, Family, Genus, Species,taxonomy,taxonomy_updated,sample,station)) %>% group_by(sample_name) %>% summarise_all(funs(sum)) -> kosmos_12S_chordata_sample_sums
colnames(kosmos_12S_chordata_sample_sums)[2] <- "total_reads_per_sample"
# Add these sample sums to original data frame
kosmos_12S_chordata_merged_combined_table_long <- left_join(kosmos_12S_chordata_merged_combined_table_long,kosmos_12S_chordata_sample_sums,by = "sample_name")
# Make a day column
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,day = gsub("_.*","",sample_name)) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$day <- factor(kosmos_12S_chordata_merged_combined_table_long$day,levels = c("D1","D8","D15","D24","D32","D36","D42","D48"))
# Re-order taxonomy_updated, so that other is last
kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated = factor(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated, levels = c(sort(unique(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated)[!unique(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated) == "other"]),"other"))
# Determine top 10 genera
# dplyr::select(kosmos_M1_12S_taxa_sums,-ASV) %>% group_by(Kingdom,Phylum, Class,Order, Family, Genus, Species) %>% summarise_all(funs(sum)) -> kosmos_M1_12S_taxa_sums_grouped
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,taxonomy_genus = ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),paste0("Family ",Family),"unassigned"))) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_taxa_sums %>% mutate(.,taxonomy_genus = ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),paste0("Family ",Family),"unassigned"))) -> kosmos_12S_chordata_merged_taxa_sums
dplyr::select(kosmos_12S_chordata_merged_taxa_sums,-c(ASV,Kingdom,Phylum, Class,Order, Genus, Family, Species)) %>% group_by(taxonomy_genus) %>% summarise_all(funs(sum)) -> kosmos_12S_chordata_genus_sums
kosmos_12S_chordata_genus_sums <- kosmos_12S_chordata_genus_sums[order(-kosmos_12S_chordata_genus_sums$total_reads),]
top12_genera <- kosmos_12S_chordata_genus_sums$taxonomy_genus[1:11]
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,genus_updated = ifelse(taxonomy_genus %in% top12_genera,taxonomy_genus,"other")) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$genus_updated <- factor(kosmos_12S_chordata_merged_combined_table_long$genus_updated, levels = c("Engraulis", "Family Sciaenidae", "Family Engraulidae", "Odontesthes", "Family Haemulidae", "Ethmidium", "Bairdiella", "Citharichthys", "Sardinops", "Scomber", "Mugil","other", "unassigned"))
inca_tern <- readPNG(here::here("figures", "phylopic", "inca_tern.png"))
inca_tern_img <- rasterGrob(inca_tern, interpolate=TRUE)
pelican <- readPNG(here::here("figures", "phylopic", "pelican.png"))
pelican_img <- rasterGrob(pelican, interpolate=TRUE)
kosmos_12S_barplot <- ggplot(subset(kosmos_12S_chordata_merged_combined_table_long,station == "M1"), aes(x = day, y = reads,fill = genus_updated))+
geom_bar(stat = "identity")+
scale_fill_tableau(palette = "Tableau 20")+
ylab("Total Reads")+
xlab("Mesocosm Sample")+
guides(color=FALSE,fill=guide_legend(title="Genus"))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
scale_y_continuous(expand = c(0, 0),lim = c(0,22000))+
annotate("text", x=-Inf, y = Inf, label = "(b)", vjust=1.5, hjust=-0.25,size = 12)+
# Add bird pictures from phylopic
annotation_custom(pelican_img, xmin=5.5, xmax=6.5, ymin= 18030, ymax=21030) +
annotation_custom(inca_tern_img, xmin=6.5, xmax=7.5, ymin= 6228, ymax=9228) +
annotation_custom(inca_tern_img, xmin=7.5, xmax=8.5, ymin= 16916, ymax=19916)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
kosmos_12S_barplot
ggsave(here::here("figures", "12S", "12S_mesocosm_barplot_no_paralichthys_genus_plus_birds_090220.png"),kosmos_12S_barplot,width = 8, height = 3.5)
kosmos_12S_barplot <- ggplot(subset(kosmos_12S_chordata_merged_combined_table_long,station == "MP"), aes(x = day, y = reads,fill = genus_updated))+
geom_bar(stat = "identity")+
scale_fill_tableau(palette = "Tableau 20")+
ylab("Total Reads")+
xlab("Pacific Sample")+
guides(color=FALSE,fill=guide_legend(title="Genus"))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
scale_y_continuous(expand = c(0, 0),lim = c(0,22000))+
annotate("text", x=-Inf, y = Inf, label = "(a)", vjust=1.5, hjust=-0.25,size = 12)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
kosmos_12S_barplot
ggsave(here::here("figures", "12S", "12S_pacific_barplot_noparalichthys_genus.png"),kosmos_12S_barplot,width = 8, height = 3.5)
fakedata <- rep(0.05,12)
fakesample <- rep("nosample",12)
legend_data <- data.frame(allgroups,fakedata,fakesample)
legend_data$allgroups <- factor(unique(kosmos_12S_chordata_merged_combined_table_long$genus_updated), levels = c("Engraulis", "Family Sciaenidae", "Family Engraulidae", "Odontesthes", "Family Haemulidae", "Ethmidium", "Bairdiella", "Citharichthys", "Sardinops", "Scomber", "Mugil","other", "unassigned"))
legend_gg<-ggplot(legend_data, aes(x=fakesample,y=fakedata,fill=allgroups))+
geom_bar(stat='identity')+theme_minimal()+
# scale_fill_tableau(palette = "Tableau 20")+
scale_fill_tableau(palette = "Tableau 20",
labels = c(expression(italic("Engraulis")), "Family Sciaenidae", "Family Engraulidae", expression(italic("Odontesthes")), "Family Haemulidae", expression(italic("Ethmidium")), expression(italic("Bairdiella")), expression(italic("Citharichthys")), expression(italic("Sardinops")), expression(italic("Scomber")), "other", "unassigned"))+
theme(axis.text.x = element_blank(),
legend.position = "right",
axis.title=element_text(size=8),
plot.margin = unit(c(0,5,0,0), "pt"),
legend.text=element_text(size=10),
legend.title = element_blank(),
legend.text.align = 0
)+
guides(fill=guide_legend(ncol=4))+
xlab("Sample")+
ylab("Proportion of Reads")
legend_gg
ggsave(here::here("figures", "12S", "12S_genus_barplot_legend.png"),legend_gg,width = 8, height = 2)
kosmos_12S_M1_genus_phyloseq <- tax_glom(kosmos_12S_M1_phyloseq,taxrank=rank_names(kosmos_12S_M1_phyloseq)[6], NArm = FALSE)
kosmos_12S_M1_genus_asv_table <- as.data.frame(as(otu_table(kosmos_12S_M1_genus_phyloseq),"matrix"))
kosmos_12S_M1_genus_asv_table <- rownames_to_column(kosmos_12S_M1_genus_asv_table,"ASV")
kosmos_12S_M1_genus_tax_table <- as.data.frame(as(tax_table(kosmos_12S_M1_genus_phyloseq),"matrix"))
kosmos_12S_M1_genus_tax_table <- rownames_to_column(kosmos_12S_M1_genus_tax_table,"ASV")
kosmos_12S_M1_genus_combined_table <- left_join(kosmos_12S_M1_genus_asv_table, kosmos_12S_M1_genus_tax_table, by = "ASV")
# Convert to long form
gathercols <- colnames(dplyr::select(kosmos_12S_M1_genus_asv_table,-ASV))
kosmos_12S_M1_genus_combined_table_gather <- gather(kosmos_12S_M1_genus_combined_table, key = "sample", value = "reads", gathercols, factor_key=TRUE)
kosmos_12S_M1_genus_combined_table_gather$sample <- as.character(kosmos_12S_M1_genus_combined_table_gather$sample)
kosmos_12S_M1_genus_combined_table_gather %>% mutate(.,day = gsub("_.*","", gsub("KOSMOSD","",sample))) -> kosmos_12S_M1_genus_combined_table_gather
paralichthys_M1_long <- subset(kosmos_12S_M1_genus_combined_table_gather, Genus == "Paralichthys")
paralichthys_M1_long$sample <- factor(paralichthys_M1_long$sample, levels = c("KOSMOSD1_M1BS", "KOSMOSD8_M1AS", "KOSMOSD15_M1AS", "KOSMOSD24_M1AS", "KOSMOSD32_M1AS", "KOSMOSD36_M1AS", "KOSMOSD42_M1AS", "KOSMOSD48_M1AS"))
# paralichthys_M1_long$day <- factor(paralichthys_M1_long$day,levels = c("1","8","15","24","32","36","42","48"))
paralichthys_M1_long$day <- as.numeric(as.character(paralichthys_M1_long$day))
paralichthys_M1_long %>% mutate(.,sample_name = paste0("D",day)) -> paralichthys_M1_long
paralichthys_M1_long$sample_name <- factor(paralichthys_M1_long$sample_name,levels = c("D1","D8","D15","D24","D32","D36","D42","D48"))
paralichthys_M1_gg <- ggplot(paralichthys_M1_long, aes(x = sample_name, y = reads,fill = Genus))+
geom_bar(stat = 'identity',width=0.35)+
xlab("Mesocosm Sample")+
ylab("Total Reads")+
geom_vline(xintercept=4.75,color="#e41a1c",lty=2,size = 1.2)+
scale_fill_manual(values = c("black"))+
scale_y_continuous(expand = c(0, 0),lim = c(0,8500))+
# annotate(geom = "text",x = 4.65, y = 4250,angle = 90, color = "#e41a1c", size = 2, label = expression(paste(italic("Paralichthys adspersus")," eggs added on day 31")))+
annotate(geom = "text",x = 4.38, y = 4250,angle = 90, color = "#e41a1c", size = 6, label = expression(paste(italic("Paralichthys adspersus"))))+
annotate(geom = "text",x = 4.57, y = 4250,angle = 90, color = "#e41a1c", size = 6, label = "eggs added on day 31")+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
annotate("text", x=-Inf, y = Inf, label = "(c)", vjust=1.5, hjust=-0.25,size = 12)+
annotate("rect",xmin = 1, xmax = 1.35, ymin = 5500, ymax = 6250,fill = "black",color = "black")+
annotate("text", x = 1.45, y = 5875, label = "Paralichthys",hjust = 0,size = 6)
paralichthys_M1_gg
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
ggsave(here::here("figures", "12S", "paralichthys_M1.png"),paralichthys_M1_gg,width = 8, height = 4)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'